From 25fd4ef10e610ee470b76d6f58b4a3b9cd053844 Mon Sep 17 00:00:00 2001
From: gdkchan <gab.dark.100@gmail.com>
Date: Sun, 17 Oct 2021 17:28:18 -0300
Subject: [PATCH] Extend bindless elimination to work with masked and shifted
 handles (#2727)

* Extent bindless elimination to work with masked handles

* Extend bindless elimination to catch shifted pattern, refactor handle packing/unpacking
---
 .../Image/TextureBindingsManager.cs           | 54 ++++-------
 Ryujinx.Graphics.Shader/TextureHandle.cs      | 54 +++++++++++
 .../Optimizations/BindlessElimination.cs      | 91 ++++++++++++++++++-
 3 files changed, 159 insertions(+), 40 deletions(-)
 create mode 100644 Ryujinx.Graphics.Shader/TextureHandle.cs

diff --git a/Ryujinx.Graphics.Gpu/Image/TextureBindingsManager.cs b/Ryujinx.Graphics.Gpu/Image/TextureBindingsManager.cs
index dff32dc665..5862ea712e 100644
--- a/Ryujinx.Graphics.Gpu/Image/TextureBindingsManager.cs
+++ b/Ryujinx.Graphics.Gpu/Image/TextureBindingsManager.cs
@@ -14,12 +14,6 @@ namespace Ryujinx.Graphics.Gpu.Image
         private const int InitialTextureStateSize = 32;
         private const int InitialImageStateSize = 8;
 
-        private const int HandleHigh = 16;
-        private const int HandleMask = (1 << HandleHigh) - 1;
-
-        private const int SlotHigh = 16;
-        private const int SlotMask = (1 << SlotHigh) - 1;
-
         private readonly GpuContext _context;
 
         private readonly bool _isCompute;
@@ -348,19 +342,7 @@ namespace Ryujinx.Graphics.Gpu.Image
             {
                 TextureBindingInfo bindingInfo = _textureBindings[stageIndex][index];
 
-                int textureBufferIndex;
-                int samplerBufferIndex;
-
-                if (bindingInfo.CbufSlot < 0)
-                {
-                    textureBufferIndex = _textureBufferIndex;
-                    samplerBufferIndex = textureBufferIndex;
-                }
-                else
-                {
-                    textureBufferIndex = bindingInfo.CbufSlot & SlotMask;
-                    samplerBufferIndex = ((bindingInfo.CbufSlot >> SlotHigh) != 0) ? (bindingInfo.CbufSlot >> SlotHigh) - 1 : textureBufferIndex;
-                }
+                (int textureBufferIndex, int samplerBufferIndex) = TextureHandle.UnpackSlots(bindingInfo.CbufSlot, _textureBufferIndex);
 
                 int packedId = ReadPackedId(stageIndex, bindingInfo.Handle, textureBufferIndex, samplerBufferIndex);
                 int textureId = UnpackTextureId(packedId);
@@ -440,19 +422,7 @@ namespace Ryujinx.Graphics.Gpu.Image
             {
                 TextureBindingInfo bindingInfo = _imageBindings[stageIndex][index];
 
-                int textureBufferIndex;
-                int samplerBufferIndex;
-
-                if (bindingInfo.CbufSlot < 0)
-                {
-                    textureBufferIndex = _textureBufferIndex;
-                    samplerBufferIndex = textureBufferIndex;
-                }
-                else
-                {
-                    textureBufferIndex = bindingInfo.CbufSlot & SlotMask;
-                    samplerBufferIndex = ((bindingInfo.CbufSlot >> SlotHigh) != 0) ? (bindingInfo.CbufSlot >> SlotHigh) - 1 : textureBufferIndex;
-                }
+                (int textureBufferIndex, int samplerBufferIndex) = TextureHandle.UnpackSlots(bindingInfo.CbufSlot, _textureBufferIndex);
 
                 int packedId = ReadPackedId(stageIndex, bindingInfo.Handle, textureBufferIndex, samplerBufferIndex);
                 int textureId = UnpackTextureId(packedId);
@@ -522,8 +492,9 @@ namespace Ryujinx.Graphics.Gpu.Image
             int handle,
             int cbufSlot)
         {
-            int textureBufferIndex = cbufSlot < 0 ? bufferIndex : cbufSlot & SlotMask;
-            int packedId = ReadPackedId(stageIndex, handle, textureBufferIndex, textureBufferIndex);
+            (int textureBufferIndex, int samplerBufferIndex) = TextureHandle.UnpackSlots(cbufSlot, bufferIndex);
+
+            int packedId = ReadPackedId(stageIndex, handle, textureBufferIndex, samplerBufferIndex);
             int textureId = UnpackTextureId(packedId);
 
             ulong poolAddress = _channel.MemoryManager.Translate(poolGpuVa);
@@ -544,11 +515,13 @@ namespace Ryujinx.Graphics.Gpu.Image
         /// <returns>The packed texture and sampler ID (the real texture handle)</returns>
         private int ReadPackedId(int stageIndex, int wordOffset, int textureBufferIndex, int samplerBufferIndex)
         {
+            (int textureWordOffset, int samplerWordOffset, TextureHandleType handleType) = TextureHandle.UnpackOffsets(wordOffset);
+
             ulong textureBufferAddress = _isCompute
                 ? _channel.BufferManager.GetComputeUniformBufferAddress(textureBufferIndex)
                 : _channel.BufferManager.GetGraphicsUniformBufferAddress(stageIndex, textureBufferIndex);
 
-            int handle = _channel.MemoryManager.Physical.Read<int>(textureBufferAddress + (ulong)(wordOffset & HandleMask) * 4);
+            int handle = _channel.MemoryManager.Physical.Read<int>(textureBufferAddress + (uint)textureWordOffset * 4);
 
             // The "wordOffset" (which is really the immediate value used on texture instructions on the shader)
             // is a 13-bit value. However, in order to also support separate samplers and textures (which uses
@@ -556,13 +529,20 @@ namespace Ryujinx.Graphics.Gpu.Image
             // another offset for the sampler.
             // The shader translator has code to detect separate texture and sampler uses with a bindless texture,
             // turn that into a regular texture access and produce those special handles with values on the higher 16 bits.
-            if (wordOffset >> HandleHigh != 0)
+            if (handleType != TextureHandleType.CombinedSampler)
             {
                 ulong samplerBufferAddress = _isCompute
                     ? _channel.BufferManager.GetComputeUniformBufferAddress(samplerBufferIndex)
                     : _channel.BufferManager.GetGraphicsUniformBufferAddress(stageIndex, samplerBufferIndex);
 
-                handle |= _channel.MemoryManager.Physical.Read<int>(samplerBufferAddress + (ulong)((wordOffset >> HandleHigh) - 1) * 4);
+                int samplerHandle = _channel.MemoryManager.Physical.Read<int>(samplerBufferAddress + (uint)samplerWordOffset * 4);
+
+                if (handleType == TextureHandleType.SeparateSamplerId)
+                {
+                    samplerHandle <<= 20;
+                }
+
+                handle |= samplerHandle;
             }
 
             return handle;
diff --git a/Ryujinx.Graphics.Shader/TextureHandle.cs b/Ryujinx.Graphics.Shader/TextureHandle.cs
new file mode 100644
index 0000000000..b3712e6bf2
--- /dev/null
+++ b/Ryujinx.Graphics.Shader/TextureHandle.cs
@@ -0,0 +1,54 @@
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Shader
+{
+    public enum TextureHandleType
+    {
+        CombinedSampler = 0, // Must be 0.
+        SeparateSamplerHandle = 1,
+        SeparateSamplerId = 2
+    }
+
+    public static class TextureHandle
+    {
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int PackSlots(int cbufSlot0, int cbufSlot1)
+        {
+            return cbufSlot0 | ((cbufSlot1 + 1) << 16);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static (int, int) UnpackSlots(int slots, int defaultTextureBufferIndex)
+        {
+            int textureBufferIndex;
+            int samplerBufferIndex;
+
+            if (slots < 0)
+            {
+                textureBufferIndex = defaultTextureBufferIndex;
+                samplerBufferIndex = textureBufferIndex;
+            }
+            else
+            {
+                uint high = (uint)slots >> 16;
+
+                textureBufferIndex = (ushort)slots;
+                samplerBufferIndex = high != 0 ? (int)high - 1 : textureBufferIndex;
+            }
+
+            return (textureBufferIndex, samplerBufferIndex);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int PackOffsets(int cbufOffset0, int cbufOffset1, TextureHandleType type)
+        {
+            return cbufOffset0 | (cbufOffset1 << 14) | ((int)type << 28);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static (int, int, TextureHandleType) UnpackOffsets(int handle)
+        {
+            return (handle & 0x3fff, (handle >> 14) & 0x3fff, (TextureHandleType)((uint)handle >> 28));
+        }
+    }
+}
diff --git a/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs b/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs
index e2f2b752a7..a76df6a172 100644
--- a/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs
+++ b/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs
@@ -51,6 +51,60 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
                     Operand src0 = Utils.FindLastOperation(handleCombineOp.GetSource(0), block);
                     Operand src1 = Utils.FindLastOperation(handleCombineOp.GetSource(1), block);
 
+                    TextureHandleType handleType = TextureHandleType.SeparateSamplerHandle;
+
+                    // Try to match masked pattern:
+                    // - samplerHandle = samplerHandle & 0xFFF00000;
+                    // - textureHandle = textureHandle & 0xFFFFF;
+                    // - combinedHandle = samplerHandle | textureHandle;
+                    // where samplerHandle and textureHandle comes from a constant buffer, and shifted pattern:
+                    // - samplerHandle = samplerId << 20;
+                    // - combinedHandle = samplerHandle | textureHandle;
+                    // where samplerId and textureHandle comes from a constant buffer.
+                    if (src0.AsgOp is Operation src0AsgOp)
+                    {
+                        if (src1.AsgOp is Operation src1AsgOp &&
+                            src0AsgOp.Inst == Instruction.BitwiseAnd &&
+                            src1AsgOp.Inst == Instruction.BitwiseAnd)
+                        {
+                            src0 = GetSourceForMaskedHandle(src0AsgOp, 0xFFFFF);
+                            src1 = GetSourceForMaskedHandle(src1AsgOp, 0xFFF00000);
+
+                            // The OR operation is commutative, so we can also try to swap the operands to get a match.
+                            if (src0 == null || src1 == null)
+                            {
+                                src0 = GetSourceForMaskedHandle(src1AsgOp, 0xFFFFF);
+                                src1 = GetSourceForMaskedHandle(src0AsgOp, 0xFFF00000);
+                            }
+
+                            if (src0 == null || src1 == null)
+                            {
+                                continue;
+                            }
+                        }
+                        else if (src0AsgOp.Inst == Instruction.ShiftLeft)
+                        {
+                            Operand shift = src0AsgOp.GetSource(1);
+
+                            if (shift.Type == OperandType.Constant && shift.Value == 20)
+                            {
+                                src0 = src1;
+                                src1 = src0AsgOp.GetSource(0);
+                                handleType = TextureHandleType.SeparateSamplerId;
+                            }
+                        }
+                    }
+                    else if (src1.AsgOp is Operation src1AsgOp && src1AsgOp.Inst == Instruction.ShiftLeft)
+                    {
+                        Operand shift = src1AsgOp.GetSource(1);
+
+                        if (shift.Type == OperandType.Constant && shift.Value == 20)
+                        {
+                            src1 = src1AsgOp.GetSource(0);
+                            handleType = TextureHandleType.SeparateSamplerId;
+                        }
+                    }
+
                     if (src0.Type != OperandType.ConstantBuffer || src1.Type != OperandType.ConstantBuffer)
                     {
                         continue;
@@ -59,8 +113,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
                     SetHandle(
                         config,
                         texOp,
-                        src0.GetCbufOffset() | ((src1.GetCbufOffset() + 1) << 16),
-                        src0.GetCbufSlot() | ((src1.GetCbufSlot() + 1) << 16),
+                        TextureHandle.PackOffsets(src0.GetCbufOffset(), src1.GetCbufOffset(), handleType),
+                        TextureHandle.PackSlots(src0.GetCbufSlot(), src1.GetCbufSlot()),
                         rewriteSamplerType);
                 }
                 else if (texOp.Inst == Instruction.ImageLoad ||
@@ -89,10 +143,41 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
             }
         }
 
+        private static Operand GetSourceForMaskedHandle(Operation asgOp, uint mask)
+        {
+            // Assume it was already checked that the operation is bitwise AND.
+            Operand src0 = asgOp.GetSource(0);
+            Operand src1 = asgOp.GetSource(1);
+
+            if (src0.Type == OperandType.ConstantBuffer && src1.Type == OperandType.ConstantBuffer)
+            {
+                // We can't check if the mask matches here as both operands are from a constant buffer.
+                // Be optimistic and assume it matches. Avoid constant buffer 1 as official drivers
+                // uses this one to store compiler constants.
+                return src0.GetCbufSlot() == 1 ? src1 : src0;
+            }
+            else if (src0.Type == OperandType.ConstantBuffer && src1.Type == OperandType.Constant)
+            {
+                if ((uint)src1.Value == mask)
+                {
+                    return src0;
+                }
+            }
+            else if (src0.Type == OperandType.Constant && src1.Type == OperandType.ConstantBuffer)
+            {
+                if ((uint)src0.Value == mask)
+                {
+                    return src1;
+                }
+            }
+
+            return null;
+        }
+
         private static void SetHandle(ShaderConfig config, TextureOperation texOp, int cbufOffset, int cbufSlot, bool rewriteSamplerType)
         {
             texOp.SetHandle(cbufOffset, cbufSlot);
-            
+
             if (rewriteSamplerType)
             {
                 texOp.Type = config.GpuAccessor.QuerySamplerType(cbufOffset, cbufSlot);