Archived
1
0
Fork 0
forked from Mirror/Ryujinx

Implement shader LEA instruction and improve bindless image load/store (#1355)

This commit is contained in:
gdkchan 2020-07-03 20:48:44 -03:00 committed by GitHub
parent 76e5af967a
commit e13154c83d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 82 additions and 43 deletions

View file

@ -4,12 +4,12 @@ namespace Ryujinx.Graphics.Shader.Decoders
{
public static bool Extract(this int value, int lsb)
{
return ((int)(value >> lsb) & 1) != 0;
return ((value >> lsb) & 1) != 0;
}
public static int Extract(this int value, int lsb, int length)
{
return (int)(value >> lsb) & (int)(uint.MaxValue >> (32 - length));
return (value >> lsb) & (int)(uint.MaxValue >> (32 - length));
}
public static bool Extract(this long value, int lsb)

View file

@ -176,6 +176,9 @@ namespace Ryujinx.Graphics.Shader.Decoders
Set("1110111110010x", InstEmit.Ldc, typeof(OpCodeLdc));
Set("1110111011010x", InstEmit.Ldg, typeof(OpCodeMemory));
Set("1110111101001x", InstEmit.Lds, typeof(OpCodeMemory));
Set("010010111101xx", InstEmit.Lea, typeof(OpCodeAluCbuf));
Set("0011011x11010x", InstEmit.Lea, typeof(OpCodeAluImm));
Set("0101101111010x", InstEmit.Lea, typeof(OpCodeAluReg));
Set("0100110001000x", InstEmit.Lop, typeof(OpCodeLopCbuf));
Set("0011100001000x", InstEmit.Lop, typeof(OpCodeLopImm));
Set("000001xxxxxxxx", InstEmit.Lop, typeof(OpCodeLopImm32));

View file

@ -384,6 +384,27 @@ namespace Ryujinx.Graphics.Shader.Instructions
context.Copy(Register(op.Predicate0), p1Res);
}
public static void Lea(EmitterContext context)
{
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
bool negateA = op.RawOpCode.Extract(45);
int shift = op.RawOpCode.Extract(39, 5);
Operand srcA = GetSrcA(context);
Operand srcB = GetSrcB(context);
srcA = context.ShiftLeft(srcA, Const(shift));
srcA = context.INegate(srcA, negateA);
Operand res = context.IAdd(srcA, srcB);
context.Copy(GetDest(context), res);
// TODO: CC, X
}
public static void Lop(EmitterContext context)
{
IOpCodeLop op = (IOpCodeLop)context.CurrOp;

View file

@ -99,7 +99,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
if (!op.IsBindless)
{
operation.Format = GetTextureFormat(context, handle);
operation.Format = context.Config.GetTextureFormat(handle);
}
context.Add(operation);
@ -228,7 +228,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
if (!op.IsBindless)
{
format = GetTextureFormat(context, op.Immediate);
format = context.Config.GetTextureFormat(op.Immediate);
}
}
else
@ -1223,27 +1223,6 @@ namespace Ryujinx.Graphics.Shader.Instructions
};
}
private static TextureFormat GetTextureFormat(EmitterContext context, int handle)
{
// When the formatted load extension is supported, we don't need to
// specify a format, we can just declare it without a format and the GPU will handle it.
if (context.Config.GpuAccessor.QuerySupportsImageLoadFormatted())
{
return TextureFormat.Unknown;
}
var format = context.Config.GpuAccessor.QueryTextureFormat(handle);
if (format == TextureFormat.Unknown)
{
context.Config.GpuAccessor.Log($"Unknown format for texture {handle}.");
format = TextureFormat.R8G8B8A8Unorm;
}
return format;
}
private static TextureFormat GetTextureFormat(IntegerSize size)
{
return size switch

View file

@ -5,7 +5,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{
class BindlessElimination
{
public static void RunPass(BasicBlock block)
private const int NvnTextureBufferSlot = 2;
public static void RunPass(BasicBlock block, ShaderConfig config)
{
// We can turn a bindless into regular access by recognizing the pattern
// produced by the compiler for separate texture and sampler.
@ -24,26 +26,39 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
continue;
}
if (!(texOp.GetSource(0).AsgOp is Operation handleCombineOp))
if (texOp.Inst == Instruction.TextureSample)
{
continue;
}
if (!(texOp.GetSource(0).AsgOp is Operation handleCombineOp))
{
continue;
}
if (handleCombineOp.Inst != Instruction.BitwiseOr)
if (handleCombineOp.Inst != Instruction.BitwiseOr)
{
continue;
}
Operand src0 = handleCombineOp.GetSource(0);
Operand src1 = handleCombineOp.GetSource(1);
if (src0.Type != OperandType.ConstantBuffer || src0.GetCbufSlot() != NvnTextureBufferSlot ||
src1.Type != OperandType.ConstantBuffer || src1.GetCbufSlot() != NvnTextureBufferSlot)
{
continue;
}
texOp.SetHandle(src0.GetCbufOffset() | (src1.GetCbufOffset() << 16));
}
else if (texOp.Inst == Instruction.ImageLoad || texOp.Inst == Instruction.ImageStore)
{
continue;
Operand src0 = texOp.GetSource(0);
if (src0.Type == OperandType.ConstantBuffer && src0.GetCbufSlot() == NvnTextureBufferSlot)
{
texOp.SetHandle(src0.GetCbufOffset());
texOp.Format = config.GetTextureFormat(texOp.Handle);
}
}
Operand src0 = handleCombineOp.GetSource(0);
Operand src1 = handleCombineOp.GetSource(1);
if (src0.Type != OperandType.ConstantBuffer || src0.GetCbufSlot() != 2 ||
src1.Type != OperandType.ConstantBuffer || src1.GetCbufSlot() != 2)
{
continue;
}
texOp.SetHandle(src0.GetCbufOffset() | (src1.GetCbufOffset() << 16));
}
}
}

View file

@ -89,7 +89,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
{
BindlessToIndexed.RunPass(blocks[blkIndex]);
BindlessElimination.RunPass(blocks[blkIndex]);
BindlessElimination.RunPass(blocks[blkIndex], config);
// Try to eliminate any operations that are now unused.
LinkedListNode<INode> node = blocks[blkIndex].Operations.First;

View file

@ -68,5 +68,26 @@ namespace Ryujinx.Graphics.Shader.Translation
// The depth register is always two registers after the last color output.
return count + 1;
}
public TextureFormat GetTextureFormat(int handle)
{
// When the formatted load extension is supported, we don't need to
// specify a format, we can just declare it without a format and the GPU will handle it.
if (GpuAccessor.QuerySupportsImageLoadFormatted())
{
return TextureFormat.Unknown;
}
var format = GpuAccessor.QueryTextureFormat(handle);
if (format == TextureFormat.Unknown)
{
GpuAccessor.Log($"Unknown format for texture {handle}.");
format = TextureFormat.R8G8B8A8Unorm;
}
return format;
}
}
}