From 2cdcfe46d8959b0cbd8aea3b4439b30a55d47f00 Mon Sep 17 00:00:00 2001
From: gdkchan <gab.dark.100@gmail.com>
Date: Thu, 8 Jun 2023 17:43:16 -0300
Subject: [PATCH] Remove barrier on Intel if control flow is potentially
 divergent (#5044)

* Remove barrier on Intel if control flow is potentially divergent

* Shader cache version bump
---
 src/Ryujinx.Graphics.GAL/Capabilities.cs      |  3 ++
 .../Shader/DiskCache/DiskCacheHostStorage.cs  |  2 +-
 .../Shader/GpuAccessorBase.cs                 |  2 ++
 src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs |  2 ++
 .../CodeGen/Glsl/GlslGenerator.cs             | 32 ++++++++++++++++---
 .../CodeGen/Spirv/CodeGenContext.cs           |  7 +++-
 .../CodeGen/Spirv/Instructions.cs             | 12 +++++++
 .../CodeGen/Spirv/SpirvGenerator.cs           |  2 +-
 src/Ryujinx.Graphics.Shader/IGpuAccessor.cs   |  9 ++++++
 src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs |  1 +
 10 files changed, 64 insertions(+), 8 deletions(-)

diff --git a/src/Ryujinx.Graphics.GAL/Capabilities.cs b/src/Ryujinx.Graphics.GAL/Capabilities.cs
index f2dd0963fb..3b6e6b906b 100644
--- a/src/Ryujinx.Graphics.GAL/Capabilities.cs
+++ b/src/Ryujinx.Graphics.GAL/Capabilities.cs
@@ -34,6 +34,7 @@ namespace Ryujinx.Graphics.GAL
         public readonly bool SupportsCubemapView;
         public readonly bool SupportsNonConstantTextureOffset;
         public readonly bool SupportsShaderBallot;
+        public readonly bool SupportsShaderBarrierDivergence;
         public readonly bool SupportsShaderFloat64;
         public readonly bool SupportsTextureShadowLod;
         public readonly bool SupportsViewportIndexVertexTessellation;
@@ -82,6 +83,7 @@ namespace Ryujinx.Graphics.GAL
             bool supportsCubemapView,
             bool supportsNonConstantTextureOffset,
             bool supportsShaderBallot,
+            bool supportsShaderBarrierDivergence,
             bool supportsShaderFloat64,
             bool supportsTextureShadowLod,
             bool supportsViewportIndexVertexTessellation,
@@ -126,6 +128,7 @@ namespace Ryujinx.Graphics.GAL
             SupportsCubemapView = supportsCubemapView;
             SupportsNonConstantTextureOffset = supportsNonConstantTextureOffset;
             SupportsShaderBallot = supportsShaderBallot;
+            SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence;
             SupportsShaderFloat64 = supportsShaderFloat64;
             SupportsTextureShadowLod = supportsTextureShadowLod;
             SupportsViewportIndexVertexTessellation = supportsViewportIndexVertexTessellation;
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
index 9419ea92c1..f35b542a28 100644
--- a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
+++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
@@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
         private const ushort FileFormatVersionMajor = 1;
         private const ushort FileFormatVersionMinor = 2;
         private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
-        private const uint CodeGenVersion = 5159;
+        private const uint CodeGenVersion = 5044;
 
         private const string SharedTocFileName = "shared.toc";
         private const string SharedDataFileName = "shared.data";
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs
index a60564e0e2..57e79ac7f4 100644
--- a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs
+++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs
@@ -141,6 +141,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
 
         public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot;
 
+        public bool QueryHostSupportsShaderBarrierDivergence() => _context.Capabilities.SupportsShaderBarrierDivergence;
+
         public bool QueryHostSupportsShaderFloat64() => _context.Capabilities.SupportsShaderFloat64;
 
         public bool QueryHostSupportsSnormBufferTextureFormat() => _context.Capabilities.SupportsSnormBufferTextureFormat;
diff --git a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs
index 234340e5f0..81faa00eff 100644
--- a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs
+++ b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs
@@ -127,6 +127,7 @@ namespace Ryujinx.Graphics.OpenGL
         public Capabilities GetCapabilities()
         {
             bool intelWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelWindows;
+            bool intelUnix = HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelUnix;
             bool amdWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.AmdWindows;
 
             return new Capabilities(
@@ -158,6 +159,7 @@ namespace Ryujinx.Graphics.OpenGL
                 supportsCubemapView: true,
                 supportsNonConstantTextureOffset: HwCapabilities.SupportsNonConstantTextureOffset,
                 supportsShaderBallot: HwCapabilities.SupportsShaderBallot,
+                supportsShaderBarrierDivergence: !(intelWindows || intelUnix),
                 supportsShaderFloat64: true,
                 supportsTextureShadowLod: HwCapabilities.SupportsTextureShadowLod,
                 supportsViewportIndexVertexTessellation: HwCapabilities.SupportsShaderViewportLayerArray,
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs
index 751d035075..fe0d275b64 100644
--- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs
@@ -28,18 +28,18 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
 
                 for (int i = 1; i < info.Functions.Count; i++)
                 {
-                    PrintFunction(context, info, info.Functions[i]);
+                    PrintFunction(context, info.Functions[i]);
 
                     context.AppendLine();
                 }
             }
 
-            PrintFunction(context, info, info.Functions[0], MainFunctionName);
+            PrintFunction(context, info.Functions[0], MainFunctionName);
 
             return context.GetCode();
         }
 
-        private static void PrintFunction(CodeGenContext context, StructuredProgramInfo info, StructuredFunction function, string funcName = null)
+        private static void PrintFunction(CodeGenContext context, StructuredFunction function, string funcName = null)
         {
             context.CurrentFunction = function;
 
@@ -48,7 +48,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
 
             Declarations.DeclareLocals(context, function);
 
-            PrintBlock(context, function.MainBlock);
+            PrintBlock(context, function.MainBlock, funcName == MainFunctionName);
 
             context.LeaveScope();
         }
@@ -72,7 +72,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
             return $"{Declarations.GetVarTypeName(context, function.ReturnType)} {funcName ?? function.Name}({string.Join(", ", args)})";
         }
 
-        private static void PrintBlock(CodeGenContext context, AstBlock block)
+        private static void PrintBlock(CodeGenContext context, AstBlock block, bool isMainFunction)
         {
             AstBlockVisitor visitor = new AstBlockVisitor(block);
 
@@ -112,10 +112,32 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
                 }
             };
 
+            bool supportsBarrierDivergence = context.Config.GpuAccessor.QueryHostSupportsShaderBarrierDivergence();
+            bool mayHaveReturned = false;
+
             foreach (IAstNode node in visitor.Visit())
             {
                 if (node is AstOperation operation)
                 {
+                    if (!supportsBarrierDivergence)
+                    {
+                        if (operation.Inst == IntermediateRepresentation.Instruction.Barrier)
+                        {
+                            // Barrier on divergent control flow paths may cause the GPU to hang,
+                            // so skip emitting the barrier for those cases.
+                            if (visitor.Block.Type != AstBlockType.Main || mayHaveReturned || !isMainFunction)
+                            {
+                                context.Config.GpuAccessor.Log($"Shader has barrier on potentially divergent block, the barrier will be removed.");
+
+                                continue;
+                            }
+                        }
+                        else if (operation.Inst == IntermediateRepresentation.Instruction.Return)
+                        {
+                            mayHaveReturned = true;
+                        }
+                    }
+
                     string expr = InstGen.GetExpression(context, operation);
 
                     if (expr != null)
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs
index c1bfa08836..1f5167e667 100644
--- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs
@@ -76,6 +76,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
 
         public SpirvDelegates Delegates { get; }
 
+        public bool IsMainFunction { get; private set; }
+        public bool MayHaveReturned { get; set; }
+
         public CodeGenContext(
             StructuredProgramInfo info,
             ShaderConfig config,
@@ -108,8 +111,10 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
             Delegates = new SpirvDelegates(this);
         }
 
-        public void StartFunction()
+        public void StartFunction(bool isMainFunction)
         {
+            IsMainFunction = isMainFunction;
+            MayHaveReturned = false;
             _locals.Clear();
             _localForArgs.Clear();
             _funcArgs.Clear();
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs
index 4be0c62be8..6c11575250 100644
--- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs
@@ -242,6 +242,16 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
 
         private static OperationResult GenerateBarrier(CodeGenContext context, AstOperation operation)
         {
+            // Barrier on divergent control flow paths may cause the GPU to hang,
+            // so skip emitting the barrier for those cases.
+            if (!context.Config.GpuAccessor.QueryHostSupportsShaderBarrierDivergence() &&
+                (context.CurrentBlock.Type != AstBlockType.Main || context.MayHaveReturned || !context.IsMainFunction))
+            {
+                context.Config.GpuAccessor.Log($"Shader has barrier on potentially divergent block, the barrier will be removed.");
+
+                return OperationResult.Invalid;
+            }
+
             context.ControlBarrier(
                 context.Constant(context.TypeU32(), Scope.Workgroup),
                 context.Constant(context.TypeU32(), Scope.Workgroup),
@@ -1092,6 +1102,8 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
 
         private static OperationResult GenerateReturn(CodeGenContext context, AstOperation operation)
         {
+            context.MayHaveReturned = true;
+
             if (operation.SourcesCount != 0)
             {
                 context.ReturnValue(context.Get(context.CurrentFunction.ReturnType, operation.GetSource(0)));
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs
index a55e09fd3f..5c736b605e 100644
--- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs
@@ -148,7 +148,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
 
             context.CurrentFunction = function;
             context.AddFunction(spvFunc);
-            context.StartFunction();
+            context.StartFunction(isMainFunction: funcIndex == 0);
 
             Declarations.DeclareParameters(context, function);
 
diff --git a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs
index d4f99e11c8..d3794cdddf 100644
--- a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs
+++ b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs
@@ -331,6 +331,15 @@ namespace Ryujinx.Graphics.Shader
             return true;
         }
 
+        /// <summary>
+        /// Queries host GPU shader support for barrier instructions on divergent control flow paths.
+        /// </summary>
+        /// <returns>True if the GPU supports barriers on divergent control flow paths, false otherwise</returns>
+        bool QueryHostSupportsShaderBarrierDivergence()
+        {
+            return true;
+        }
+
         /// <summary>
         /// Queries host GPU support for 64-bit floating point (double precision) operations on the shader.
         /// </summary>
diff --git a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs
index 0daec00c33..a059d683ab 100644
--- a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs
+++ b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs
@@ -595,6 +595,7 @@ namespace Ryujinx.Graphics.Vulkan
                 supportsCubemapView: !IsAmdGcn,
                 supportsNonConstantTextureOffset: false,
                 supportsShaderBallot: false,
+                supportsShaderBarrierDivergence: Vendor != Vendor.Intel,
                 supportsShaderFloat64: Capabilities.SupportsShaderFloat64,
                 supportsTextureShadowLod: false,
                 supportsViewportIndexVertexTessellation: featuresVk12.ShaderOutputViewportIndex,