From 2e43d01d3658d82f98c9eeea8280d8ec122c0c6b Mon Sep 17 00:00:00 2001 From: gdkchan Date: Fri, 18 Nov 2022 23:27:54 -0300 Subject: [PATCH] Move gl_Layer from vertex to geometry if GPU does not support it on vertex (#3866) * Move gl_Layer from vertex to geometry if GPU does not support it on vertex * Shader cache version bump * PR feedback --- Ryujinx.Graphics.GAL/Capabilities.cs | 3 + .../Shader/DiskCache/DiskCacheHostStorage.cs | 2 +- .../DiskCache/ParallelDiskCacheLoader.cs | 12 +++ .../Shader/GpuAccessorBase.cs | 2 + Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs | 12 +++ Ryujinx.Graphics.OpenGL/HwCapabilities.cs | 2 + Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs | 3 +- Ryujinx.Graphics.Shader/IGpuAccessor.cs | 9 ++ .../Instructions/InstEmitAttribute.cs | 14 ++- .../Translation/ShaderConfig.cs | 38 ++++++++ .../Translation/Translator.cs | 12 +-- .../Translation/TranslatorContext.cs | 97 ++++++++++++++++++- Ryujinx.Graphics.Vulkan/VulkanRenderer.cs | 1 + 13 files changed, 190 insertions(+), 17 deletions(-) diff --git a/Ryujinx.Graphics.GAL/Capabilities.cs b/Ryujinx.Graphics.GAL/Capabilities.cs index 3138a43b5b..60f93bc4c5 100644 --- a/Ryujinx.Graphics.GAL/Capabilities.cs +++ b/Ryujinx.Graphics.GAL/Capabilities.cs @@ -21,6 +21,7 @@ namespace Ryujinx.Graphics.GAL public readonly bool SupportsFragmentShaderOrderingIntel; public readonly bool SupportsGeometryShaderPassthrough; public readonly bool SupportsImageLoadFormatted; + public readonly bool SupportsLayerVertexTessellation; public readonly bool SupportsMismatchingViewFormat; public readonly bool SupportsCubemapView; public readonly bool SupportsNonConstantTextureOffset; @@ -55,6 +56,7 @@ namespace Ryujinx.Graphics.GAL bool supportsFragmentShaderOrderingIntel, bool supportsGeometryShaderPassthrough, bool supportsImageLoadFormatted, + bool supportsLayerVertexTessellation, bool supportsMismatchingViewFormat, bool supportsCubemapView, bool supportsNonConstantTextureOffset, @@ -86,6 +88,7 @@ namespace Ryujinx.Graphics.GAL SupportsFragmentShaderOrderingIntel = supportsFragmentShaderOrderingIntel; SupportsGeometryShaderPassthrough = supportsGeometryShaderPassthrough; SupportsImageLoadFormatted = supportsImageLoadFormatted; + SupportsLayerVertexTessellation = supportsLayerVertexTessellation; SupportsMismatchingViewFormat = supportsMismatchingViewFormat; SupportsCubemapView = supportsCubemapView; SupportsNonConstantTextureOffset = supportsNonConstantTextureOffset; diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs index 7923a393e4..69067fe6f3 100644 --- a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs @@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache private const ushort FileFormatVersionMajor = 1; private const ushort FileFormatVersionMinor = 2; private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor; - private const uint CodeGenVersion = 3863; + private const uint CodeGenVersion = 3866; private const string SharedTocFileName = "shared.toc"; private const string SharedDataFileName = "shared.data"; diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs index 04d93bba5a..9261cb0dc3 100644 --- a/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs @@ -636,6 +636,8 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache CachedShaderStage[] shaders = new CachedShaderStage[guestShaders.Length]; List translatedStages = new List(); + TranslatorContext previousStage = null; + for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++) { TranslatorContext currentStage = translatorContexts[stageIndex + 1]; @@ -668,6 +670,16 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache { translatedStages.Add(program); } + + previousStage = currentStage; + } + else if ( + previousStage != null && + previousStage.LayerOutputWritten && + stageIndex == 3 && + !_context.Capabilities.SupportsLayerVertexTessellation) + { + translatedStages.Add(previousStage.GenerateGeometryPassthrough()); } } diff --git a/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs b/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs index 40c5ed64b7..9648298e9d 100644 --- a/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs +++ b/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs @@ -128,6 +128,8 @@ namespace Ryujinx.Graphics.Gpu.Shader public bool QueryHostSupportsImageLoadFormatted() => _context.Capabilities.SupportsImageLoadFormatted; + public bool QueryHostSupportsLayerVertexTessellation() => _context.Capabilities.SupportsLayerVertexTessellation; + public bool QueryHostSupportsNonConstantTextureOffset() => _context.Capabilities.SupportsNonConstantTextureOffset; public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot; diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index 2a9dd6a5c6..3eaab79f05 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -356,6 +356,8 @@ namespace Ryujinx.Graphics.Gpu.Shader CachedShaderStage[] shaders = new CachedShaderStage[Constants.ShaderStages + 1]; List shaderSources = new List(); + TranslatorContext previousStage = null; + for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++) { TranslatorContext currentStage = translatorContexts[stageIndex + 1]; @@ -392,6 +394,16 @@ namespace Ryujinx.Graphics.Gpu.Shader { shaderSources.Add(CreateShaderSource(program)); } + + previousStage = currentStage; + } + else if ( + previousStage != null && + previousStage.LayerOutputWritten && + stageIndex == 3 && + !_context.Capabilities.SupportsLayerVertexTessellation) + { + shaderSources.Add(CreateShaderSource(previousStage.GenerateGeometryPassthrough())); } } diff --git a/Ryujinx.Graphics.OpenGL/HwCapabilities.cs b/Ryujinx.Graphics.OpenGL/HwCapabilities.cs index ba2cc2df4b..8caf11dd54 100644 --- a/Ryujinx.Graphics.OpenGL/HwCapabilities.cs +++ b/Ryujinx.Graphics.OpenGL/HwCapabilities.cs @@ -18,6 +18,7 @@ namespace Ryujinx.Graphics.OpenGL private static readonly Lazy _supportsQuads = new Lazy(SupportsQuadsCheck); private static readonly Lazy _supportsSeamlessCubemapPerTexture = new Lazy(() => HasExtension("GL_ARB_seamless_cubemap_per_texture")); private static readonly Lazy _supportsShaderBallot = new Lazy(() => HasExtension("GL_ARB_shader_ballot")); + private static readonly Lazy _supportsShaderViewportLayerArray = new Lazy(() => HasExtension("GL_ARB_shader_viewport_layer_array")); private static readonly Lazy _supportsTextureCompressionBptc = new Lazy(() => HasExtension("GL_EXT_texture_compression_bptc")); private static readonly Lazy _supportsTextureCompressionRgtc = new Lazy(() => HasExtension("GL_EXT_texture_compression_rgtc")); private static readonly Lazy _supportsTextureCompressionS3tc = new Lazy(() => HasExtension("GL_EXT_texture_compression_s3tc")); @@ -61,6 +62,7 @@ namespace Ryujinx.Graphics.OpenGL public static bool SupportsQuads => _supportsQuads.Value; public static bool SupportsSeamlessCubemapPerTexture => _supportsSeamlessCubemapPerTexture.Value; public static bool SupportsShaderBallot => _supportsShaderBallot.Value; + public static bool SupportsShaderViewportLayerArray => _supportsShaderViewportLayerArray.Value; public static bool SupportsTextureCompressionBptc => _supportsTextureCompressionBptc.Value; public static bool SupportsTextureCompressionRgtc => _supportsTextureCompressionRgtc.Value; public static bool SupportsTextureCompressionS3tc => _supportsTextureCompressionS3tc.Value; diff --git a/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs b/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs index 418976e663..e26fe6b6c6 100644 --- a/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs +++ b/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs @@ -117,12 +117,13 @@ namespace Ryujinx.Graphics.OpenGL supportsFragmentShaderOrderingIntel: HwCapabilities.SupportsFragmentShaderOrdering, supportsGeometryShaderPassthrough: HwCapabilities.SupportsGeometryShaderPassthrough, supportsImageLoadFormatted: HwCapabilities.SupportsImageLoadFormatted, + supportsLayerVertexTessellation: HwCapabilities.SupportsShaderViewportLayerArray, supportsMismatchingViewFormat: HwCapabilities.SupportsMismatchingViewFormat, supportsCubemapView: true, supportsNonConstantTextureOffset: HwCapabilities.SupportsNonConstantTextureOffset, supportsShaderBallot: HwCapabilities.SupportsShaderBallot, supportsTextureShadowLod: HwCapabilities.SupportsTextureShadowLod, - supportsViewportIndex: true, + supportsViewportIndex: HwCapabilities.SupportsShaderViewportLayerArray, supportsViewportSwizzle: HwCapabilities.SupportsViewportSwizzle, supportsIndirectParameters: HwCapabilities.SupportsIndirectParameters, maximumUniformBuffersPerStage: 13, // TODO: Avoid hardcoding those limits here and get from driver? diff --git a/Ryujinx.Graphics.Shader/IGpuAccessor.cs b/Ryujinx.Graphics.Shader/IGpuAccessor.cs index f05a8527ba..cc690eedd1 100644 --- a/Ryujinx.Graphics.Shader/IGpuAccessor.cs +++ b/Ryujinx.Graphics.Shader/IGpuAccessor.cs @@ -258,6 +258,15 @@ namespace Ryujinx.Graphics.Shader return true; } + /// + /// Queries host support for writes to Layer from vertex or tessellation shader stages. + /// + /// True if writes to layer from vertex or tessellation are supported, false otherwise + bool QueryHostSupportsLayerVertexTessellation() + { + return true; + } + /// /// Queries host GPU non-constant texture offset support. /// diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs index 2f75d248b1..9f9ac1412d 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs @@ -278,13 +278,21 @@ namespace Ryujinx.Graphics.Shader.Instructions private static int FixedFuncToUserAttribute(ShaderConfig config, int attr, bool isOutput) { - if (attr >= AttributeConsts.FrontColorDiffuseR && attr < AttributeConsts.ClipDistance0) + bool supportsLayerFromVertexOrTess = config.GpuAccessor.QueryHostSupportsLayerVertexTessellation(); + int fixedStartAttr = supportsLayerFromVertexOrTess ? 0 : 1; + + if (attr == AttributeConsts.Layer && config.Stage != ShaderStage.Geometry && !supportsLayerFromVertexOrTess) { - attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.FrontColorDiffuseR, 0, isOutput); + attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.Layer, 0, isOutput); + config.SetLayerOutputAttribute(attr); + } + else if (attr >= AttributeConsts.FrontColorDiffuseR && attr < AttributeConsts.ClipDistance0) + { + attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.FrontColorDiffuseR, fixedStartAttr, isOutput); } else if (attr >= AttributeConsts.TexCoordBase && attr < AttributeConsts.TexCoordEnd) { - attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.TexCoordBase, 4, isOutput); + attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.TexCoordBase, fixedStartAttr + 4, isOutput); } return attr; diff --git a/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs index fcf35ce276..ae4107e80d 100644 --- a/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs +++ b/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs @@ -48,6 +48,9 @@ namespace Ryujinx.Graphics.Shader.Translation public int Cb1DataSize { get; private set; } + public bool LayerOutputWritten { get; private set; } + public int LayerOutputAttribute { get; private set; } + public bool NextUsesFixedFuncAttributes { get; private set; } public int UsedInputAttributes { get; private set; } public int UsedOutputAttributes { get; private set; } @@ -131,6 +134,20 @@ namespace Ryujinx.Graphics.Shader.Translation _usedImages = new Dictionary(); } + public ShaderConfig( + ShaderStage stage, + OutputTopology outputTopology, + int maxOutputVertices, + IGpuAccessor gpuAccessor, + TranslationOptions options) : this(gpuAccessor, options) + { + Stage = stage; + ThreadsPerInputPrimitive = 1; + OutputTopology = outputTopology; + MaxOutputVertices = maxOutputVertices; + TransformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled(); + } + public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationOptions options) : this(gpuAccessor, options) { Stage = header.Stage; @@ -240,6 +257,12 @@ namespace Ryujinx.Graphics.Shader.Translation } } + public void SetLayerOutputAttribute(int attr) + { + LayerOutputWritten = true; + LayerOutputAttribute = attr; + } + public void SetInputUserAttributeFixedFunc(int index) { UsedInputAttributes |= 1 << index; @@ -694,5 +717,20 @@ namespace Ryujinx.Graphics.Shader.Translation { return FindDescriptorIndex(GetImageDescriptors(), texOp); } + + public ShaderProgramInfo CreateProgramInfo() + { + return new ShaderProgramInfo( + GetConstantBufferDescriptors(), + GetStorageBufferDescriptors(), + GetTextureDescriptors(), + GetImageDescriptors(), + Stage, + UsedFeatures.HasFlag(FeatureFlags.InstanceId), + UsedFeatures.HasFlag(FeatureFlags.DrawParameters), + UsedFeatures.HasFlag(FeatureFlags.RtLayer), + ClipDistancesWritten, + OmapTargets); + } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Translation/Translator.cs b/Ryujinx.Graphics.Shader/Translation/Translator.cs index 8253919608..f8795c0ff5 100644 --- a/Ryujinx.Graphics.Shader/Translation/Translator.cs +++ b/Ryujinx.Graphics.Shader/Translation/Translator.cs @@ -79,17 +79,7 @@ namespace Ryujinx.Graphics.Shader.Translation var sInfo = StructuredProgram.MakeStructuredProgram(funcs, config); - var info = new ShaderProgramInfo( - config.GetConstantBufferDescriptors(), - config.GetStorageBufferDescriptors(), - config.GetTextureDescriptors(), - config.GetImageDescriptors(), - config.Stage, - config.UsedFeatures.HasFlag(FeatureFlags.InstanceId), - config.UsedFeatures.HasFlag(FeatureFlags.DrawParameters), - config.UsedFeatures.HasFlag(FeatureFlags.RtLayer), - config.ClipDistancesWritten, - config.OmapTargets); + var info = config.CreateProgramInfo(); return config.Options.TargetLanguage switch { diff --git a/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs b/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs index 7d820f033f..127f84a673 100644 --- a/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs +++ b/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs @@ -1,7 +1,12 @@ -using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.CodeGen.Glsl; +using Ryujinx.Graphics.Shader.CodeGen.Spirv; +using Ryujinx.Graphics.Shader.Decoders; using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using System; using System.Collections.Generic; using System.Linq; +using System.Numerics; using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; using static Ryujinx.Graphics.Shader.Translation.Translator; @@ -18,6 +23,7 @@ namespace Ryujinx.Graphics.Shader.Translation public ShaderStage Stage => _config.Stage; public int Size => _config.Size; public int Cb1DataSize => _config.Cb1DataSize; + public bool LayerOutputWritten => _config.LayerOutputWritten; public IGpuAccessor GpuAccessor => _config.GpuAccessor; @@ -149,5 +155,94 @@ namespace Ryujinx.Graphics.Shader.Translation return Translator.Translate(code, _config); } + + public ShaderProgram GenerateGeometryPassthrough() + { + int outputAttributesMask = _config.UsedOutputAttributes; + int layerOutputAttr = _config.LayerOutputAttribute; + + OutputTopology outputTopology; + int maxOutputVertices; + + switch (GpuAccessor.QueryPrimitiveTopology()) + { + case InputTopology.Points: + outputTopology = OutputTopology.PointList; + maxOutputVertices = 1; + break; + case InputTopology.Lines: + case InputTopology.LinesAdjacency: + outputTopology = OutputTopology.LineStrip; + maxOutputVertices = 2; + break; + default: + outputTopology = OutputTopology.TriangleStrip; + maxOutputVertices = 3; + break; + } + + ShaderConfig config = new ShaderConfig(ShaderStage.Geometry, outputTopology, maxOutputVertices, GpuAccessor, _config.Options); + + EmitterContext context = new EmitterContext(default, config, false); + + for (int v = 0; v < maxOutputVertices; v++) + { + int outAttrsMask = outputAttributesMask; + + while (outAttrsMask != 0) + { + int attrIndex = BitOperations.TrailingZeroCount(outAttrsMask); + + outAttrsMask &= ~(1 << attrIndex); + + for (int c = 0; c < 4; c++) + { + int attr = AttributeConsts.UserAttributeBase + attrIndex * 16 + c * 4; + + Operand value = context.LoadAttribute(Const(attr), Const(0), Const(v)); + + if (attr == layerOutputAttr) + { + context.Copy(Attribute(AttributeConsts.Layer), value); + } + else + { + context.Copy(Attribute(attr), value); + config.SetOutputUserAttribute(attrIndex); + } + + config.SetInputUserAttribute(attrIndex, c); + } + } + + for (int c = 0; c < 4; c++) + { + int attr = AttributeConsts.PositionX + c * 4; + + Operand value = context.LoadAttribute(Const(attr), Const(0), Const(v)); + + context.Copy(Attribute(attr), value); + } + + context.EmitVertex(); + } + + context.EndPrimitive(); + + var operations = context.GetOperations(); + var cfg = ControlFlowGraph.Create(operations); + var function = new Function(cfg.Blocks, "main", false, 0, 0); + + var sInfo = StructuredProgram.MakeStructuredProgram(new[] { function }, config); + + var info = config.CreateProgramInfo(); + + return config.Options.TargetLanguage switch + { + TargetLanguage.Glsl => new ShaderProgram(info, TargetLanguage.Glsl, GlslGenerator.Generate(sInfo, config)), + TargetLanguage.Spirv => new ShaderProgram(info, TargetLanguage.Spirv, SpirvGenerator.Generate(sInfo, config)), + _ => throw new NotImplementedException(config.Options.TargetLanguage.ToString()) + }; + } } } diff --git a/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs b/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs index 4a905d33de..3f8ebe67be 100644 --- a/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs +++ b/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs @@ -396,6 +396,7 @@ namespace Ryujinx.Graphics.Vulkan supportsFragmentShaderOrderingIntel: false, supportsGeometryShaderPassthrough: Capabilities.SupportsGeometryShaderPassthrough, supportsImageLoadFormatted: features2.Features.ShaderStorageImageReadWithoutFormat, + supportsLayerVertexTessellation: featuresVk12.ShaderOutputLayer, supportsMismatchingViewFormat: true, supportsCubemapView: !IsAmdGcn, supportsNonConstantTextureOffset: false,