forked from Mirror/Ryujinx
Implement soft float64 conversion on shaders when host has no support (#5159)
* Implement soft float64 conversion on shaders when host has no support * Shader cache version bump * Fix rebase error
This commit is contained in:
parent
5813b2e354
commit
fe30c03cac
12 changed files with 222 additions and 4 deletions
|
@ -34,6 +34,7 @@ namespace Ryujinx.Graphics.GAL
|
|||
public readonly bool SupportsCubemapView;
|
||||
public readonly bool SupportsNonConstantTextureOffset;
|
||||
public readonly bool SupportsShaderBallot;
|
||||
public readonly bool SupportsShaderFloat64;
|
||||
public readonly bool SupportsTextureShadowLod;
|
||||
public readonly bool SupportsViewportIndexVertexTessellation;
|
||||
public readonly bool SupportsViewportMask;
|
||||
|
@ -81,6 +82,7 @@ namespace Ryujinx.Graphics.GAL
|
|||
bool supportsCubemapView,
|
||||
bool supportsNonConstantTextureOffset,
|
||||
bool supportsShaderBallot,
|
||||
bool supportsShaderFloat64,
|
||||
bool supportsTextureShadowLod,
|
||||
bool supportsViewportIndexVertexTessellation,
|
||||
bool supportsViewportMask,
|
||||
|
@ -124,6 +126,7 @@ namespace Ryujinx.Graphics.GAL
|
|||
SupportsCubemapView = supportsCubemapView;
|
||||
SupportsNonConstantTextureOffset = supportsNonConstantTextureOffset;
|
||||
SupportsShaderBallot = supportsShaderBallot;
|
||||
SupportsShaderFloat64 = supportsShaderFloat64;
|
||||
SupportsTextureShadowLod = supportsTextureShadowLod;
|
||||
SupportsViewportIndexVertexTessellation = supportsViewportIndexVertexTessellation;
|
||||
SupportsViewportMask = supportsViewportMask;
|
||||
|
|
|
@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
|
|||
private const ushort FileFormatVersionMajor = 1;
|
||||
private const ushort FileFormatVersionMinor = 2;
|
||||
private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
|
||||
private const uint CodeGenVersion = 4992;
|
||||
private const uint CodeGenVersion = 5159;
|
||||
|
||||
private const string SharedTocFileName = "shared.toc";
|
||||
private const string SharedDataFileName = "shared.data";
|
||||
|
|
|
@ -141,6 +141,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
|||
|
||||
public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot;
|
||||
|
||||
public bool QueryHostSupportsShaderFloat64() => _context.Capabilities.SupportsShaderFloat64;
|
||||
|
||||
public bool QueryHostSupportsSnormBufferTextureFormat() => _context.Capabilities.SupportsSnormBufferTextureFormat;
|
||||
|
||||
public bool QueryHostSupportsTextureShadowLod() => _context.Capabilities.SupportsTextureShadowLod;
|
||||
|
|
|
@ -158,6 +158,7 @@ namespace Ryujinx.Graphics.OpenGL
|
|||
supportsCubemapView: true,
|
||||
supportsNonConstantTextureOffset: HwCapabilities.SupportsNonConstantTextureOffset,
|
||||
supportsShaderBallot: HwCapabilities.SupportsShaderBallot,
|
||||
supportsShaderFloat64: true,
|
||||
supportsTextureShadowLod: HwCapabilities.SupportsTextureShadowLod,
|
||||
supportsViewportIndexVertexTessellation: HwCapabilities.SupportsShaderViewportLayerArray,
|
||||
supportsViewportMask: HwCapabilities.SupportsViewportArray2,
|
||||
|
|
|
@ -331,6 +331,15 @@ namespace Ryujinx.Graphics.Shader
|
|||
return true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Queries host GPU support for 64-bit floating point (double precision) operations on the shader.
|
||||
/// </summary>
|
||||
/// <returns>True if the GPU and driver supports double operations, false otherwise</returns>
|
||||
bool QueryHostSupportsShaderFloat64()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Queries host GPU support for signed normalized buffer texture formats.
|
||||
/// </summary>
|
||||
|
|
|
@ -255,5 +255,35 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
|
|||
|
||||
_sources = new Operand[] { source };
|
||||
}
|
||||
|
||||
public void TurnDoubleIntoFloat()
|
||||
{
|
||||
if ((Inst & ~Instruction.Mask) == Instruction.FP64)
|
||||
{
|
||||
Inst = (Inst & Instruction.Mask) | Instruction.FP32;
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (Inst)
|
||||
{
|
||||
case Instruction.ConvertFP32ToFP64:
|
||||
case Instruction.ConvertFP64ToFP32:
|
||||
Inst = Instruction.Copy;
|
||||
break;
|
||||
case Instruction.ConvertFP64ToS32:
|
||||
Inst = Instruction.ConvertFP32ToS32;
|
||||
break;
|
||||
case Instruction.ConvertFP64ToU32:
|
||||
Inst = Instruction.ConvertFP32ToU32;
|
||||
break;
|
||||
case Instruction.ConvertS32ToFP64:
|
||||
Inst = Instruction.ConvertS32ToFP32;
|
||||
break;
|
||||
case Instruction.ConvertU32ToFP64:
|
||||
Inst = Instruction.ConvertU32ToFP32;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -45,12 +45,101 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
{
|
||||
return functionName switch
|
||||
{
|
||||
HelperFunctionName.ConvertDoubleToFloat => GenerateConvertDoubleToFloatFunction(),
|
||||
HelperFunctionName.ConvertFloatToDouble => GenerateConvertFloatToDoubleFunction(),
|
||||
HelperFunctionName.TexelFetchScale => GenerateTexelFetchScaleFunction(),
|
||||
HelperFunctionName.TextureSizeUnscale => GenerateTextureSizeUnscaleFunction(),
|
||||
_ => throw new ArgumentException($"Invalid function name {functionName}")
|
||||
};
|
||||
}
|
||||
|
||||
private Function GenerateConvertDoubleToFloatFunction()
|
||||
{
|
||||
EmitterContext context = new EmitterContext();
|
||||
|
||||
Operand valueLow = Argument(0);
|
||||
Operand valueHigh = Argument(1);
|
||||
|
||||
Operand mantissaLow = context.BitwiseAnd(valueLow, Const(((1 << 22) - 1)));
|
||||
Operand mantissa = context.ShiftRightU32(valueLow, Const(22));
|
||||
|
||||
mantissa = context.BitwiseOr(mantissa, context.ShiftLeft(context.BitwiseAnd(valueHigh, Const(0xfffff)), Const(10)));
|
||||
mantissa = context.BitwiseOr(mantissa, context.ConditionalSelect(mantissaLow, Const(1), Const(0)));
|
||||
|
||||
Operand exp = context.BitwiseAnd(context.ShiftRightU32(valueHigh, Const(20)), Const(0x7ff));
|
||||
Operand sign = context.ShiftRightS32(valueHigh, Const(31));
|
||||
|
||||
Operand resultSign = context.ShiftLeft(sign, Const(31));
|
||||
|
||||
Operand notZero = context.BitwiseOr(mantissa, exp);
|
||||
|
||||
Operand lblNotZero = Label();
|
||||
|
||||
context.BranchIfTrue(lblNotZero, notZero);
|
||||
|
||||
context.Return(resultSign);
|
||||
|
||||
context.MarkLabel(lblNotZero);
|
||||
|
||||
Operand notNaNOrInf = context.ICompareNotEqual(exp, Const(0x7ff));
|
||||
|
||||
mantissa = context.BitwiseOr(mantissa, Const(0x40000000));
|
||||
exp = context.ISubtract(exp, Const(0x381));
|
||||
|
||||
// Note: Overflow cases are not handled here and might produce incorrect results.
|
||||
|
||||
Operand roundBits = context.BitwiseAnd(mantissa, Const(0x7f));
|
||||
Operand roundBitsXor64 = context.BitwiseExclusiveOr(roundBits, Const(0x40));
|
||||
mantissa = context.ShiftRightU32(context.IAdd(mantissa, Const(0x40)), Const(7));
|
||||
mantissa = context.BitwiseAnd(mantissa, context.ConditionalSelect(roundBitsXor64, Const(~0), Const(~1)));
|
||||
|
||||
exp = context.ConditionalSelect(mantissa, exp, Const(0));
|
||||
exp = context.ConditionalSelect(notNaNOrInf, exp, Const(0xff));
|
||||
|
||||
Operand result = context.IAdd(context.IAdd(mantissa, context.ShiftLeft(exp, Const(23))), resultSign);
|
||||
|
||||
context.Return(result);
|
||||
|
||||
return new Function(ControlFlowGraph.Create(context.GetOperations()).Blocks, "ConvertDoubleToFloat", true, 2, 0);
|
||||
}
|
||||
|
||||
private Function GenerateConvertFloatToDoubleFunction()
|
||||
{
|
||||
EmitterContext context = new EmitterContext();
|
||||
|
||||
Operand value = Argument(0);
|
||||
|
||||
Operand mantissa = context.BitwiseAnd(value, Const(0x7fffff));
|
||||
Operand exp = context.BitwiseAnd(context.ShiftRightU32(value, Const(23)), Const(0xff));
|
||||
Operand sign = context.ShiftRightS32(value, Const(31));
|
||||
|
||||
Operand notNaNOrInf = context.ICompareNotEqual(exp, Const(0xff));
|
||||
Operand expNotZero = context.ICompareNotEqual(exp, Const(0));
|
||||
Operand notDenorm = context.BitwiseOr(expNotZero, context.ICompareEqual(mantissa, Const(0)));
|
||||
|
||||
exp = context.IAdd(exp, Const(0x380));
|
||||
|
||||
Operand shiftDist = context.ISubtract(Const(32), context.FindMSBU32(mantissa));
|
||||
Operand normExp = context.ISubtract(context.ISubtract(Const(1), shiftDist), Const(1));
|
||||
Operand normMant = context.ShiftLeft(mantissa, shiftDist);
|
||||
|
||||
exp = context.ConditionalSelect(notNaNOrInf, exp, Const(0x7ff));
|
||||
exp = context.ConditionalSelect(notDenorm, exp, normExp);
|
||||
mantissa = context.ConditionalSelect(expNotZero, mantissa, normMant);
|
||||
|
||||
Operand resultLow = context.ShiftLeft(mantissa, Const(29));
|
||||
Operand resultHigh = context.ShiftRightU32(mantissa, Const(3));
|
||||
|
||||
resultHigh = context.IAdd(resultHigh, context.ShiftLeft(exp, Const(20)));
|
||||
resultHigh = context.IAdd(resultHigh, context.ShiftLeft(sign, Const(31)));
|
||||
|
||||
context.Copy(Argument(1), resultLow);
|
||||
context.Copy(Argument(2), resultHigh);
|
||||
context.Return();
|
||||
|
||||
return new Function(ControlFlowGraph.Create(context.GetOperations()).Blocks, "ConvertFloatToDouble", false, 1, 2);
|
||||
}
|
||||
|
||||
private Function GenerateTexelFetchScaleFunction()
|
||||
{
|
||||
EmitterContext context = new EmitterContext();
|
||||
|
|
|
@ -1,10 +1,9 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
enum HelperFunctionName
|
||||
{
|
||||
ConvertDoubleToFloat,
|
||||
ConvertFloatToDouble,
|
||||
TexelFetchScale,
|
||||
TextureSizeUnscale
|
||||
}
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using System.Collections.Generic;
|
||||
|
||||
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
||||
{
|
||||
static class DoubleToFloat
|
||||
{
|
||||
public static void RunPass(HelperFunctionManager hfm, BasicBlock block)
|
||||
{
|
||||
for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
|
||||
{
|
||||
if (node.Value is not Operation operation)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
node = InsertSoftFloat64(hfm, node);
|
||||
}
|
||||
}
|
||||
|
||||
private static LinkedListNode<INode> InsertSoftFloat64(HelperFunctionManager hfm, LinkedListNode<INode> node)
|
||||
{
|
||||
Operation operation = (Operation)node.Value;
|
||||
|
||||
if (operation.Inst == Instruction.PackDouble2x32)
|
||||
{
|
||||
int functionId = hfm.GetOrCreateFunctionId(HelperFunctionName.ConvertDoubleToFloat);
|
||||
|
||||
Operand[] callArgs = new Operand[] { Const(functionId), operation.GetSource(0), operation.GetSource(1) };
|
||||
|
||||
Operand floatValue = operation.Dest;
|
||||
|
||||
operation.Dest = null;
|
||||
|
||||
LinkedListNode<INode> newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, floatValue, callArgs));
|
||||
|
||||
Utils.DeleteNode(node, operation);
|
||||
|
||||
return newNode;
|
||||
}
|
||||
else if (operation.Inst == Instruction.UnpackDouble2x32)
|
||||
{
|
||||
int functionId = hfm.GetOrCreateFunctionId(HelperFunctionName.ConvertFloatToDouble);
|
||||
|
||||
// TODO: Allow UnpackDouble2x32 to produce two outputs and get rid of "operation.Index".
|
||||
|
||||
Operand resultLow = operation.Index == 0 ? operation.Dest : Local();
|
||||
Operand resultHigh = operation.Index == 1 ? operation.Dest : Local();
|
||||
|
||||
operation.Dest = null;
|
||||
|
||||
Operand[] callArgs = new Operand[] { Const(functionId), operation.GetSource(0), resultLow, resultHigh };
|
||||
|
||||
LinkedListNode<INode> newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, (Operand)null, callArgs));
|
||||
|
||||
Utils.DeleteNode(node, operation);
|
||||
|
||||
return newNode;
|
||||
}
|
||||
else
|
||||
{
|
||||
operation.TurnDoubleIntoFloat();
|
||||
|
||||
return node;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -11,8 +11,12 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
{
|
||||
RunOptimizationPasses(blocks, config);
|
||||
|
||||
// TODO: Some of those are not optimizations and shouldn't be here.
|
||||
|
||||
GlobalToStorage.RunPass(hfm, blocks, config);
|
||||
|
||||
bool hostSupportsShaderFloat64 = config.GpuAccessor.QueryHostSupportsShaderFloat64();
|
||||
|
||||
// Those passes are looking for specific patterns and only needs to run once.
|
||||
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
|
||||
{
|
||||
|
@ -24,6 +28,12 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
{
|
||||
EliminateMultiplyByFragmentCoordW(blocks[blkIndex]);
|
||||
}
|
||||
|
||||
// If the host does not support double operations, we need to turn them into float operations.
|
||||
if (!hostSupportsShaderFloat64)
|
||||
{
|
||||
DoubleToFloat.RunPass(hfm, blocks[blkIndex]);
|
||||
}
|
||||
}
|
||||
|
||||
// Run optimizations one last time to remove any code that is now optimizable after above passes.
|
||||
|
|
|
@ -26,6 +26,7 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
public readonly bool SupportsFragmentShaderInterlock;
|
||||
public readonly bool SupportsGeometryShaderPassthrough;
|
||||
public readonly bool SupportsSubgroupSizeControl;
|
||||
public readonly bool SupportsShaderFloat64;
|
||||
public readonly bool SupportsShaderInt8;
|
||||
public readonly bool SupportsShaderStencilExport;
|
||||
public readonly bool SupportsShaderStorageImageMultisample;
|
||||
|
@ -63,6 +64,7 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
bool supportsFragmentShaderInterlock,
|
||||
bool supportsGeometryShaderPassthrough,
|
||||
bool supportsSubgroupSizeControl,
|
||||
bool supportsShaderFloat64,
|
||||
bool supportsShaderInt8,
|
||||
bool supportsShaderStencilExport,
|
||||
bool supportsShaderStorageImageMultisample,
|
||||
|
@ -99,6 +101,7 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
SupportsFragmentShaderInterlock = supportsFragmentShaderInterlock;
|
||||
SupportsGeometryShaderPassthrough = supportsGeometryShaderPassthrough;
|
||||
SupportsSubgroupSizeControl = supportsSubgroupSizeControl;
|
||||
SupportsShaderFloat64 = supportsShaderFloat64;
|
||||
SupportsShaderInt8 = supportsShaderInt8;
|
||||
SupportsShaderStencilExport = supportsShaderStencilExport;
|
||||
SupportsShaderStorageImageMultisample = supportsShaderStorageImageMultisample;
|
||||
|
|
|
@ -306,6 +306,7 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
_physicalDevice.IsDeviceExtensionPresent("VK_EXT_fragment_shader_interlock"),
|
||||
_physicalDevice.IsDeviceExtensionPresent("VK_NV_geometry_shader_passthrough"),
|
||||
supportsSubgroupSizeControl,
|
||||
features2.Features.ShaderFloat64,
|
||||
featuresShaderInt8.ShaderInt8,
|
||||
_physicalDevice.IsDeviceExtensionPresent("VK_EXT_shader_stencil_export"),
|
||||
features2.Features.ShaderStorageImageMultisample,
|
||||
|
@ -594,6 +595,7 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
supportsCubemapView: !IsAmdGcn,
|
||||
supportsNonConstantTextureOffset: false,
|
||||
supportsShaderBallot: false,
|
||||
supportsShaderFloat64: Capabilities.SupportsShaderFloat64,
|
||||
supportsTextureShadowLod: false,
|
||||
supportsViewportIndexVertexTessellation: featuresVk12.ShaderOutputViewportIndex,
|
||||
supportsViewportMask: Capabilities.SupportsViewportArray2,
|
||||
|
|
Reference in a new issue