forked from Mirror/Ryujinx
Remove barrier on Intel if control flow is potentially divergent (#5044)
* Remove barrier on Intel if control flow is potentially divergent * Shader cache version bump
This commit is contained in:
parent
fe30c03cac
commit
2cdcfe46d8
10 changed files with 64 additions and 8 deletions
|
@ -34,6 +34,7 @@ namespace Ryujinx.Graphics.GAL
|
||||||
public readonly bool SupportsCubemapView;
|
public readonly bool SupportsCubemapView;
|
||||||
public readonly bool SupportsNonConstantTextureOffset;
|
public readonly bool SupportsNonConstantTextureOffset;
|
||||||
public readonly bool SupportsShaderBallot;
|
public readonly bool SupportsShaderBallot;
|
||||||
|
public readonly bool SupportsShaderBarrierDivergence;
|
||||||
public readonly bool SupportsShaderFloat64;
|
public readonly bool SupportsShaderFloat64;
|
||||||
public readonly bool SupportsTextureShadowLod;
|
public readonly bool SupportsTextureShadowLod;
|
||||||
public readonly bool SupportsViewportIndexVertexTessellation;
|
public readonly bool SupportsViewportIndexVertexTessellation;
|
||||||
|
@ -82,6 +83,7 @@ namespace Ryujinx.Graphics.GAL
|
||||||
bool supportsCubemapView,
|
bool supportsCubemapView,
|
||||||
bool supportsNonConstantTextureOffset,
|
bool supportsNonConstantTextureOffset,
|
||||||
bool supportsShaderBallot,
|
bool supportsShaderBallot,
|
||||||
|
bool supportsShaderBarrierDivergence,
|
||||||
bool supportsShaderFloat64,
|
bool supportsShaderFloat64,
|
||||||
bool supportsTextureShadowLod,
|
bool supportsTextureShadowLod,
|
||||||
bool supportsViewportIndexVertexTessellation,
|
bool supportsViewportIndexVertexTessellation,
|
||||||
|
@ -126,6 +128,7 @@ namespace Ryujinx.Graphics.GAL
|
||||||
SupportsCubemapView = supportsCubemapView;
|
SupportsCubemapView = supportsCubemapView;
|
||||||
SupportsNonConstantTextureOffset = supportsNonConstantTextureOffset;
|
SupportsNonConstantTextureOffset = supportsNonConstantTextureOffset;
|
||||||
SupportsShaderBallot = supportsShaderBallot;
|
SupportsShaderBallot = supportsShaderBallot;
|
||||||
|
SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence;
|
||||||
SupportsShaderFloat64 = supportsShaderFloat64;
|
SupportsShaderFloat64 = supportsShaderFloat64;
|
||||||
SupportsTextureShadowLod = supportsTextureShadowLod;
|
SupportsTextureShadowLod = supportsTextureShadowLod;
|
||||||
SupportsViewportIndexVertexTessellation = supportsViewportIndexVertexTessellation;
|
SupportsViewportIndexVertexTessellation = supportsViewportIndexVertexTessellation;
|
||||||
|
|
|
@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
|
||||||
private const ushort FileFormatVersionMajor = 1;
|
private const ushort FileFormatVersionMajor = 1;
|
||||||
private const ushort FileFormatVersionMinor = 2;
|
private const ushort FileFormatVersionMinor = 2;
|
||||||
private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
|
private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
|
||||||
private const uint CodeGenVersion = 5159;
|
private const uint CodeGenVersion = 5044;
|
||||||
|
|
||||||
private const string SharedTocFileName = "shared.toc";
|
private const string SharedTocFileName = "shared.toc";
|
||||||
private const string SharedDataFileName = "shared.data";
|
private const string SharedDataFileName = "shared.data";
|
||||||
|
|
|
@ -141,6 +141,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||||
|
|
||||||
public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot;
|
public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot;
|
||||||
|
|
||||||
|
public bool QueryHostSupportsShaderBarrierDivergence() => _context.Capabilities.SupportsShaderBarrierDivergence;
|
||||||
|
|
||||||
public bool QueryHostSupportsShaderFloat64() => _context.Capabilities.SupportsShaderFloat64;
|
public bool QueryHostSupportsShaderFloat64() => _context.Capabilities.SupportsShaderFloat64;
|
||||||
|
|
||||||
public bool QueryHostSupportsSnormBufferTextureFormat() => _context.Capabilities.SupportsSnormBufferTextureFormat;
|
public bool QueryHostSupportsSnormBufferTextureFormat() => _context.Capabilities.SupportsSnormBufferTextureFormat;
|
||||||
|
|
|
@ -127,6 +127,7 @@ namespace Ryujinx.Graphics.OpenGL
|
||||||
public Capabilities GetCapabilities()
|
public Capabilities GetCapabilities()
|
||||||
{
|
{
|
||||||
bool intelWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelWindows;
|
bool intelWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelWindows;
|
||||||
|
bool intelUnix = HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelUnix;
|
||||||
bool amdWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.AmdWindows;
|
bool amdWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.AmdWindows;
|
||||||
|
|
||||||
return new Capabilities(
|
return new Capabilities(
|
||||||
|
@ -158,6 +159,7 @@ namespace Ryujinx.Graphics.OpenGL
|
||||||
supportsCubemapView: true,
|
supportsCubemapView: true,
|
||||||
supportsNonConstantTextureOffset: HwCapabilities.SupportsNonConstantTextureOffset,
|
supportsNonConstantTextureOffset: HwCapabilities.SupportsNonConstantTextureOffset,
|
||||||
supportsShaderBallot: HwCapabilities.SupportsShaderBallot,
|
supportsShaderBallot: HwCapabilities.SupportsShaderBallot,
|
||||||
|
supportsShaderBarrierDivergence: !(intelWindows || intelUnix),
|
||||||
supportsShaderFloat64: true,
|
supportsShaderFloat64: true,
|
||||||
supportsTextureShadowLod: HwCapabilities.SupportsTextureShadowLod,
|
supportsTextureShadowLod: HwCapabilities.SupportsTextureShadowLod,
|
||||||
supportsViewportIndexVertexTessellation: HwCapabilities.SupportsShaderViewportLayerArray,
|
supportsViewportIndexVertexTessellation: HwCapabilities.SupportsShaderViewportLayerArray,
|
||||||
|
|
|
@ -28,18 +28,18 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
|
||||||
|
|
||||||
for (int i = 1; i < info.Functions.Count; i++)
|
for (int i = 1; i < info.Functions.Count; i++)
|
||||||
{
|
{
|
||||||
PrintFunction(context, info, info.Functions[i]);
|
PrintFunction(context, info.Functions[i]);
|
||||||
|
|
||||||
context.AppendLine();
|
context.AppendLine();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PrintFunction(context, info, info.Functions[0], MainFunctionName);
|
PrintFunction(context, info.Functions[0], MainFunctionName);
|
||||||
|
|
||||||
return context.GetCode();
|
return context.GetCode();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void PrintFunction(CodeGenContext context, StructuredProgramInfo info, StructuredFunction function, string funcName = null)
|
private static void PrintFunction(CodeGenContext context, StructuredFunction function, string funcName = null)
|
||||||
{
|
{
|
||||||
context.CurrentFunction = function;
|
context.CurrentFunction = function;
|
||||||
|
|
||||||
|
@ -48,7 +48,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
|
||||||
|
|
||||||
Declarations.DeclareLocals(context, function);
|
Declarations.DeclareLocals(context, function);
|
||||||
|
|
||||||
PrintBlock(context, function.MainBlock);
|
PrintBlock(context, function.MainBlock, funcName == MainFunctionName);
|
||||||
|
|
||||||
context.LeaveScope();
|
context.LeaveScope();
|
||||||
}
|
}
|
||||||
|
@ -72,7 +72,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
|
||||||
return $"{Declarations.GetVarTypeName(context, function.ReturnType)} {funcName ?? function.Name}({string.Join(", ", args)})";
|
return $"{Declarations.GetVarTypeName(context, function.ReturnType)} {funcName ?? function.Name}({string.Join(", ", args)})";
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void PrintBlock(CodeGenContext context, AstBlock block)
|
private static void PrintBlock(CodeGenContext context, AstBlock block, bool isMainFunction)
|
||||||
{
|
{
|
||||||
AstBlockVisitor visitor = new AstBlockVisitor(block);
|
AstBlockVisitor visitor = new AstBlockVisitor(block);
|
||||||
|
|
||||||
|
@ -112,10 +112,32 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
bool supportsBarrierDivergence = context.Config.GpuAccessor.QueryHostSupportsShaderBarrierDivergence();
|
||||||
|
bool mayHaveReturned = false;
|
||||||
|
|
||||||
foreach (IAstNode node in visitor.Visit())
|
foreach (IAstNode node in visitor.Visit())
|
||||||
{
|
{
|
||||||
if (node is AstOperation operation)
|
if (node is AstOperation operation)
|
||||||
{
|
{
|
||||||
|
if (!supportsBarrierDivergence)
|
||||||
|
{
|
||||||
|
if (operation.Inst == IntermediateRepresentation.Instruction.Barrier)
|
||||||
|
{
|
||||||
|
// Barrier on divergent control flow paths may cause the GPU to hang,
|
||||||
|
// so skip emitting the barrier for those cases.
|
||||||
|
if (visitor.Block.Type != AstBlockType.Main || mayHaveReturned || !isMainFunction)
|
||||||
|
{
|
||||||
|
context.Config.GpuAccessor.Log($"Shader has barrier on potentially divergent block, the barrier will be removed.");
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (operation.Inst == IntermediateRepresentation.Instruction.Return)
|
||||||
|
{
|
||||||
|
mayHaveReturned = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
string expr = InstGen.GetExpression(context, operation);
|
string expr = InstGen.GetExpression(context, operation);
|
||||||
|
|
||||||
if (expr != null)
|
if (expr != null)
|
||||||
|
|
|
@ -76,6 +76,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
|
||||||
|
|
||||||
public SpirvDelegates Delegates { get; }
|
public SpirvDelegates Delegates { get; }
|
||||||
|
|
||||||
|
public bool IsMainFunction { get; private set; }
|
||||||
|
public bool MayHaveReturned { get; set; }
|
||||||
|
|
||||||
public CodeGenContext(
|
public CodeGenContext(
|
||||||
StructuredProgramInfo info,
|
StructuredProgramInfo info,
|
||||||
ShaderConfig config,
|
ShaderConfig config,
|
||||||
|
@ -108,8 +111,10 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
|
||||||
Delegates = new SpirvDelegates(this);
|
Delegates = new SpirvDelegates(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void StartFunction()
|
public void StartFunction(bool isMainFunction)
|
||||||
{
|
{
|
||||||
|
IsMainFunction = isMainFunction;
|
||||||
|
MayHaveReturned = false;
|
||||||
_locals.Clear();
|
_locals.Clear();
|
||||||
_localForArgs.Clear();
|
_localForArgs.Clear();
|
||||||
_funcArgs.Clear();
|
_funcArgs.Clear();
|
||||||
|
|
|
@ -242,6 +242,16 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
|
||||||
|
|
||||||
private static OperationResult GenerateBarrier(CodeGenContext context, AstOperation operation)
|
private static OperationResult GenerateBarrier(CodeGenContext context, AstOperation operation)
|
||||||
{
|
{
|
||||||
|
// Barrier on divergent control flow paths may cause the GPU to hang,
|
||||||
|
// so skip emitting the barrier for those cases.
|
||||||
|
if (!context.Config.GpuAccessor.QueryHostSupportsShaderBarrierDivergence() &&
|
||||||
|
(context.CurrentBlock.Type != AstBlockType.Main || context.MayHaveReturned || !context.IsMainFunction))
|
||||||
|
{
|
||||||
|
context.Config.GpuAccessor.Log($"Shader has barrier on potentially divergent block, the barrier will be removed.");
|
||||||
|
|
||||||
|
return OperationResult.Invalid;
|
||||||
|
}
|
||||||
|
|
||||||
context.ControlBarrier(
|
context.ControlBarrier(
|
||||||
context.Constant(context.TypeU32(), Scope.Workgroup),
|
context.Constant(context.TypeU32(), Scope.Workgroup),
|
||||||
context.Constant(context.TypeU32(), Scope.Workgroup),
|
context.Constant(context.TypeU32(), Scope.Workgroup),
|
||||||
|
@ -1092,6 +1102,8 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
|
||||||
|
|
||||||
private static OperationResult GenerateReturn(CodeGenContext context, AstOperation operation)
|
private static OperationResult GenerateReturn(CodeGenContext context, AstOperation operation)
|
||||||
{
|
{
|
||||||
|
context.MayHaveReturned = true;
|
||||||
|
|
||||||
if (operation.SourcesCount != 0)
|
if (operation.SourcesCount != 0)
|
||||||
{
|
{
|
||||||
context.ReturnValue(context.Get(context.CurrentFunction.ReturnType, operation.GetSource(0)));
|
context.ReturnValue(context.Get(context.CurrentFunction.ReturnType, operation.GetSource(0)));
|
||||||
|
|
|
@ -148,7 +148,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
|
||||||
|
|
||||||
context.CurrentFunction = function;
|
context.CurrentFunction = function;
|
||||||
context.AddFunction(spvFunc);
|
context.AddFunction(spvFunc);
|
||||||
context.StartFunction();
|
context.StartFunction(isMainFunction: funcIndex == 0);
|
||||||
|
|
||||||
Declarations.DeclareParameters(context, function);
|
Declarations.DeclareParameters(context, function);
|
||||||
|
|
||||||
|
|
|
@ -331,6 +331,15 @@ namespace Ryujinx.Graphics.Shader
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Queries host GPU shader support for barrier instructions on divergent control flow paths.
|
||||||
|
/// </summary>
|
||||||
|
/// <returns>True if the GPU supports barriers on divergent control flow paths, false otherwise</returns>
|
||||||
|
bool QueryHostSupportsShaderBarrierDivergence()
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Queries host GPU support for 64-bit floating point (double precision) operations on the shader.
|
/// Queries host GPU support for 64-bit floating point (double precision) operations on the shader.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
|
|
@ -595,6 +595,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||||
supportsCubemapView: !IsAmdGcn,
|
supportsCubemapView: !IsAmdGcn,
|
||||||
supportsNonConstantTextureOffset: false,
|
supportsNonConstantTextureOffset: false,
|
||||||
supportsShaderBallot: false,
|
supportsShaderBallot: false,
|
||||||
|
supportsShaderBarrierDivergence: Vendor != Vendor.Intel,
|
||||||
supportsShaderFloat64: Capabilities.SupportsShaderFloat64,
|
supportsShaderFloat64: Capabilities.SupportsShaderFloat64,
|
||||||
supportsTextureShadowLod: false,
|
supportsTextureShadowLod: false,
|
||||||
supportsViewportIndexVertexTessellation: featuresVk12.ShaderOutputViewportIndex,
|
supportsViewportIndexVertexTessellation: featuresVk12.ShaderOutputViewportIndex,
|
||||||
|
|
Loading…
Reference in a new issue