From 21c9ac6240a3db3300143d1d0dd4a1070d4f576f Mon Sep 17 00:00:00 2001
From: gdkchan <gab.dark.100@gmail.com>
Date: Sat, 3 Jun 2023 20:12:18 -0300
Subject: [PATCH] Implement shader storage buffer operations using new
 Load/Store instructions (#4993)

* Implement storage buffer operations using new Load/Store instruction

* Extend GenerateMultiTargetStorageOp to also match access with constant offset, and log and comments

* Remove now unused code

* Catch more complex cases of global memory usage

* Shader cache version bump

* Extend global access elimination to work with more shared memory cases

* Change alignment requirement from 16 bytes to 8 bytes, handle cases where we need more than 16 storage buffers

* Tweak preferencing to catch more cases

* Enable CB0 elimination even when host storage buffer alignment is > 16 (for Intel)

* Fix storage buffer bindings

* Simplify some code

* Shader cache version bump

* Fix typo

* Extend global memory elimination to handle shared memory with multiple possible offsets and local memory
---
 src/Ryujinx.Graphics.Gpu/Constants.cs         |    5 -
 .../Engine/Compute/ComputeClass.cs            |   24 -
 .../Memory/BufferManager.cs                   |    2 +-
 .../Shader/DiskCache/DiskCacheHostStorage.cs  |    2 +-
 .../Shader/ShaderInfoBuilder.cs               |   15 +-
 .../CodeGen/Glsl/Declarations.cs              |   60 +-
 .../CodeGen/Glsl/DefaultNames.cs              |    6 -
 .../AtomicMinMaxS32Storage.glsl               |   21 -
 .../HelperFunctions/StoreStorageSmallInt.glsl |   23 -
 .../CodeGen/Glsl/Instructions/InstGen.cs      |   56 +-
 .../Glsl/Instructions/InstGenHelper.cs        |    4 -
 .../Glsl/Instructions/InstGenMemory.cs        |   89 +-
 .../CodeGen/Glsl/OperandManager.cs            |    5 +-
 .../CodeGen/Spirv/CodeGenContext.cs           |   11 +-
 .../CodeGen/Spirv/Declarations.cs             |   76 +-
 .../CodeGen/Spirv/Instructions.cs             |  119 +-
 src/Ryujinx.Graphics.Shader/Constants.cs      |    2 -
 src/Ryujinx.Graphics.Shader/IGpuAccessor.cs   |    2 +-
 .../Instructions/InstEmitFlowControl.cs       |    2 +
 .../Instructions/InstEmitMemory.cs            |   46 +-
 .../IntermediateRepresentation/Instruction.cs |    8 -
 .../IntermediateRepresentation/StorageKind.cs |    7 +-
 .../Ryujinx.Graphics.Shader.csproj            |    2 -
 .../StructuredIr/HelperFunctionsMask.cs       |    2 -
 .../StructuredIr/InstructionInfo.cs           |    8 +-
 .../StructuredIr/ShaderProperties.cs          |    8 +
 .../StructuredIr/StructuredProgram.cs         |    8 -
 .../Translation/EmitterContextInsts.cs        |   86 +-
 .../Translation/GlobalMemory.cs               |   54 -
 .../Translation/HelperFunctionManager.cs      |   11 +-
 .../Optimizations/GlobalToStorage.cs          | 1361 ++++++++++++-----
 .../Translation/Optimizations/Optimizer.cs    |    8 +-
 .../Optimizations/Simplification.cs           |   68 +-
 .../Translation/Optimizations/Utils.cs        |   13 +
 .../Translation/ResourceManager.cs            |  104 ++
 .../Translation/Rewriter.cs                   |  205 +--
 .../Translation/ShaderConfig.cs               |  151 +-
 .../Translation/ShaderIdentifier.cs           |   14 +-
 .../Translation/Translator.cs                 |    2 +-
 src/Ryujinx.Graphics.Vulkan/BufferManager.cs  |    4 +-
 .../DescriptorSetCollection.cs                |   24 -
 .../DescriptorSetUpdater.cs                   |    9 +-
 42 files changed, 1468 insertions(+), 1259 deletions(-)
 delete mode 100644 src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl
 delete mode 100644 src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl
 delete mode 100644 src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs

diff --git a/src/Ryujinx.Graphics.Gpu/Constants.cs b/src/Ryujinx.Graphics.Gpu/Constants.cs
index b559edc25b..ff90e61ba1 100644
--- a/src/Ryujinx.Graphics.Gpu/Constants.cs
+++ b/src/Ryujinx.Graphics.Gpu/Constants.cs
@@ -80,11 +80,6 @@ namespace Ryujinx.Graphics.Gpu
         /// </summary>
         public const int GobAlignment = 64;
 
-        /// <summary>
-        /// Expected byte alignment for storage buffers
-        /// </summary>
-        public const int StorageAlignment = 16;
-
         /// <summary>
         /// Number of the uniform buffer reserved by the driver to store the storage buffer base addresses.
         /// </summary>
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs
index 998ece2247..8227a7ff18 100644
--- a/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs
@@ -187,30 +187,6 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute
                 info = cs.Shaders[0].Info;
             }
 
-            for (int index = 0; index < info.CBuffers.Count; index++)
-            {
-                BufferDescriptor cb = info.CBuffers[index];
-
-                // NVN uses the "hardware" constant buffer for anything that is less than 8,
-                // and those are already bound above.
-                // Anything greater than or equal to 8 uses the emulated constant buffers.
-                // They are emulated using global memory loads.
-                if (cb.Slot < 8)
-                {
-                    continue;
-                }
-
-                ulong cbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0);
-
-                int cbDescOffset = 0x260 + (cb.Slot - 8) * 0x10;
-
-                cbDescAddress += (ulong)cbDescOffset;
-
-                SbDescriptor cbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(cbDescAddress);
-
-                _channel.BufferManager.SetComputeUniformBuffer(cb.Slot, cbDescriptor.PackAddress(), (uint)cbDescriptor.Size);
-            }
-
             _channel.BufferManager.SetComputeBufferBindings(cs.Bindings);
 
             _channel.TextureManager.SetComputeBindings(cs.Bindings);
diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs
index e20e1bb681..48cb33b4d6 100644
--- a/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs
+++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs
@@ -222,7 +222,7 @@ namespace Ryujinx.Graphics.Gpu.Memory
         /// <param name="gpuVa">Start GPU virtual address of the buffer</param>
         private void RecordStorageAlignment(BuffersPerStage buffers, int index, ulong gpuVa)
         {
-            bool unaligned = (gpuVa & (Constants.StorageAlignment - 1)) != 0;
+            bool unaligned = (gpuVa & ((ulong)_context.Capabilities.StorageBufferOffsetAlignment - 1)) != 0;
 
             if (unaligned || HasUnalignedStorageBuffers)
             {
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
index 7f83f58800..4b828080d9 100644
--- a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
+++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
@@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
         private const ushort FileFormatVersionMajor = 1;
         private const ushort FileFormatVersionMinor = 2;
         private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
-        private const uint CodeGenVersion = 5027;
+        private const uint CodeGenVersion = 4992;
 
         private const string SharedTocFileName = "shared.toc";
         private const string SharedDataFileName = "shared.data";
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderInfoBuilder.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderInfoBuilder.cs
index 39b31cf6ab..3fc32d7119 100644
--- a/src/Ryujinx.Graphics.Gpu/Shader/ShaderInfoBuilder.cs
+++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderInfoBuilder.cs
@@ -92,7 +92,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
             int imageBinding = stageIndex * imagesPerStage * 2;
 
             AddDescriptor(stages, ResourceType.UniformBuffer, UniformSetIndex, uniformBinding, uniformsPerStage);
-            AddArrayDescriptor(stages, ResourceType.StorageBuffer, StorageSetIndex, storageBinding, storagesPerStage);
+            AddDescriptor(stages, ResourceType.StorageBuffer, StorageSetIndex, storageBinding, storagesPerStage);
             AddDualDescriptor(stages, ResourceType.TextureAndSampler, ResourceType.BufferTexture, TextureSetIndex, textureBinding, texturesPerStage);
             AddDualDescriptor(stages, ResourceType.Image, ResourceType.BufferImage, ImageSetIndex, imageBinding, imagesPerStage);
 
@@ -133,19 +133,6 @@ namespace Ryujinx.Graphics.Gpu.Shader
             AddDescriptor(stages, type2, setIndex, binding + count, count);
         }
 
-        /// <summary>
-        /// Adds an array resource to the list of descriptors.
-        /// </summary>
-        /// <param name="stages">Shader stages where the resource is used</param>
-        /// <param name="type">Type of the resource</param>
-        /// <param name="setIndex">Descriptor set number where the resource will be bound</param>
-        /// <param name="binding">Binding number where the resource will be bound</param>
-        /// <param name="count">Number of resources bound at the binding location</param>
-        private void AddArrayDescriptor(ResourceStages stages, ResourceType type, int setIndex, int binding, int count)
-        {
-            _resourceDescriptors[setIndex].Add(new ResourceDescriptor(binding, count, type, stages));
-        }
-
         /// <summary>
         /// Adds buffer usage information to the list of usages.
         /// </summary>
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs
index 1bd0182b5a..958f1cef39 100644
--- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs
@@ -104,14 +104,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
             }
 
             DeclareConstantBuffers(context, context.Config.Properties.ConstantBuffers.Values);
-
-            var sBufferDescriptors = context.Config.GetStorageBufferDescriptors();
-            if (sBufferDescriptors.Length != 0)
-            {
-                DeclareStorages(context, sBufferDescriptors);
-
-                context.AppendLine();
-            }
+            DeclareStorageBuffers(context, context.Config.Properties.StorageBuffers.Values);
 
             var textureDescriptors = context.Config.GetTextureDescriptors();
             if (textureDescriptors.Length != 0)
@@ -250,11 +243,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
                 AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl");
             }
 
-            if ((info.HelperFunctionsMask & HelperFunctionsMask.AtomicMinMaxS32Storage) != 0)
-            {
-                AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl");
-            }
-
             if ((info.HelperFunctionsMask & HelperFunctionsMask.MultiplyHighS32) != 0)
             {
                 AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl");
@@ -290,11 +278,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
                 AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl");
             }
 
-            if ((info.HelperFunctionsMask & HelperFunctionsMask.StoreStorageSmallInt) != 0)
-            {
-                AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl");
-            }
-
             if ((info.HelperFunctionsMask & HelperFunctionsMask.SwizzleAdd) != 0)
             {
                 AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl");
@@ -356,6 +339,16 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
         }
 
         private static void DeclareConstantBuffers(CodeGenContext context, IEnumerable<BufferDefinition> buffers)
+        {
+            DeclareBuffers(context, buffers, "uniform");
+        }
+
+        private static void DeclareStorageBuffers(CodeGenContext context, IEnumerable<BufferDefinition> buffers)
+        {
+            DeclareBuffers(context, buffers, "buffer");
+        }
+
+        private static void DeclareBuffers(CodeGenContext context, IEnumerable<BufferDefinition> buffers, string declType)
         {
             foreach (BufferDefinition buffer in buffers)
             {
@@ -365,7 +358,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
                     _ => "std430"
                 };
 
-                context.AppendLine($"layout (binding = {buffer.Binding}, {layout}) uniform _{buffer.Name}");
+                context.AppendLine($"layout (binding = {buffer.Binding}, {layout}) {declType} _{buffer.Name}");
                 context.EnterScope();
 
                 foreach (StructureField field in buffer.Type.Fields)
@@ -373,9 +366,17 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
                     if (field.Type.HasFlag(AggregateType.Array))
                     {
                         string typeName = GetVarTypeName(context, field.Type & ~AggregateType.Array);
-                        string arraySize = field.ArrayLength.ToString(CultureInfo.InvariantCulture);
 
-                        context.AppendLine($"{typeName} {field.Name}[{arraySize}];");
+                        if (field.ArrayLength > 0)
+                        {
+                            string arraySize = field.ArrayLength.ToString(CultureInfo.InvariantCulture);
+
+                            context.AppendLine($"{typeName} {field.Name}[{arraySize}];");
+                        }
+                        else
+                        {
+                            context.AppendLine($"{typeName} {field.Name}[];");
+                        }
                     }
                     else
                     {
@@ -390,22 +391,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
             }
         }
 
-        private static void DeclareStorages(CodeGenContext context, BufferDescriptor[] descriptors)
-        {
-            string sbName = OperandManager.GetShaderStagePrefix(context.Config.Stage);
-
-            sbName += "_" + DefaultNames.StorageNamePrefix;
-
-            string blockName = $"{sbName}_{DefaultNames.BlockSuffix}";
-
-            string layout = context.Config.Options.TargetApi == TargetApi.Vulkan ? ", set = 1" : string.Empty;
-
-            context.AppendLine($"layout (binding = {context.Config.FirstStorageBufferBinding}{layout}, std430) buffer {blockName}");
-            context.EnterScope();
-            context.AppendLine("uint " + DefaultNames.DataName + "[];");
-            context.LeaveScope($" {sbName}[{NumberFormatter.FormatInt(descriptors.Max(x => x.Slot) + 1)}];");
-        }
-
         private static void DeclareSamplers(CodeGenContext context, TextureDescriptor[] descriptors)
         {
             int arraySize = 0;
@@ -733,7 +718,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
 
             code = code.Replace("\t", CodeGenContext.Tab);
             code = code.Replace("$SHARED_MEM$", DefaultNames.SharedMemoryName);
-            code = code.Replace("$STORAGE_MEM$", OperandManager.GetShaderStagePrefix(context.Config.Stage) + "_" + DefaultNames.StorageNamePrefix);
 
             if (context.Config.GpuAccessor.QueryHostSupportsShaderBallot())
             {
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs
index fc3004a8fb..5ee8259cfa 100644
--- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs
@@ -11,12 +11,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
         public const string IAttributePrefix = "in_attr";
         public const string OAttributePrefix = "out_attr";
 
-        public const string StorageNamePrefix = "s";
-
-        public const string DataName = "data";
-
-        public const string BlockSuffix = "block";
-
         public const string LocalMemoryName  = "local_mem";
         public const string SharedMemoryName = "shared_mem";
 
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl
deleted file mode 100644
index 0862a71bf6..0000000000
--- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl
+++ /dev/null
@@ -1,21 +0,0 @@
-int Helper_AtomicMaxS32(int index, int offset, int value)
-{
-    uint oldValue, newValue;
-    do
-    {
-        oldValue = $STORAGE_MEM$[index].data[offset];
-        newValue = uint(max(int(oldValue), value));
-    } while (atomicCompSwap($STORAGE_MEM$[index].data[offset], oldValue, newValue) != oldValue);
-    return int(oldValue);
-}
-
-int Helper_AtomicMinS32(int index, int offset, int value)
-{
-    uint oldValue, newValue;
-    do
-    {
-        oldValue = $STORAGE_MEM$[index].data[offset];
-        newValue = uint(min(int(oldValue), value));
-    } while (atomicCompSwap($STORAGE_MEM$[index].data[offset], oldValue, newValue) != oldValue);
-    return int(oldValue);
-}
\ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl
deleted file mode 100644
index f2253a7969..0000000000
--- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl
+++ /dev/null
@@ -1,23 +0,0 @@
-void Helper_StoreStorage16(int index, int offset, uint value)
-{
-    int wordOffset = offset >> 2;
-    int bitOffset = (offset & 3) * 8;
-    uint oldValue, newValue;
-    do
-    {
-        oldValue = $STORAGE_MEM$[index].data[wordOffset];
-        newValue = bitfieldInsert(oldValue, value, bitOffset, 16);
-    } while (atomicCompSwap($STORAGE_MEM$[index].data[wordOffset], oldValue, newValue) != oldValue);
-}
-
-void Helper_StoreStorage8(int index, int offset, uint value)
-{
-    int wordOffset = offset >> 2;
-    int bitOffset = (offset & 3) * 8;
-    uint oldValue, newValue;
-    do
-    {
-        oldValue = $STORAGE_MEM$[index].data[wordOffset];
-        newValue = bitfieldInsert(oldValue, value, bitOffset, 8);
-    } while (atomicCompSwap($STORAGE_MEM$[index].data[wordOffset], oldValue, newValue) != oldValue);
-}
\ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs
index 24ea66d02d..01d8a6e7a7 100644
--- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs
@@ -68,33 +68,45 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
 
                 string args = string.Empty;
 
-                for (int argIndex = 0; argIndex < arity; argIndex++)
+                if (atomic && operation.StorageKind == StorageKind.StorageBuffer)
                 {
+                    args = GenerateLoadOrStore(context, operation, isStore: false);
+
+                    AggregateType dstType = operation.Inst == Instruction.AtomicMaxS32 || operation.Inst == Instruction.AtomicMinS32
+                        ? AggregateType.S32
+                        : AggregateType.U32;
+
+                    for (int argIndex = operation.SourcesCount - arity + 2; argIndex < operation.SourcesCount; argIndex++)
+                    {
+                        args += ", " + GetSoureExpr(context, operation.GetSource(argIndex), dstType);
+                    }
+                }
+                else if (atomic && operation.StorageKind == StorageKind.SharedMemory)
+                {
+                    args = LoadShared(context, operation);
+
                     // For shared memory access, the second argument is unused and should be ignored.
                     // It is there to make both storage and shared access have the same number of arguments.
                     // For storage, both inputs are consumed when the argument index is 0, so we should skip it here.
-                    if (argIndex == 1 && (atomic || operation.StorageKind == StorageKind.SharedMemory))
-                    {
-                        continue;
-                    }
 
-                    if (argIndex != 0)
+                    for (int argIndex = 2; argIndex < arity; argIndex++)
                     {
                         args += ", ";
-                    }
 
-                    if (argIndex == 0 && atomic)
+                        AggregateType dstType = GetSrcVarType(inst, argIndex);
+
+                        args += GetSoureExpr(context, operation.GetSource(argIndex), dstType);
+                    }
+                }
+                else
+                {
+                    for (int argIndex = 0; argIndex < arity; argIndex++)
                     {
-                        switch (operation.StorageKind)
+                        if (argIndex != 0)
                         {
-                            case StorageKind.SharedMemory: args += LoadShared(context, operation); break;
-                            case StorageKind.StorageBuffer: args += LoadStorage(context, operation); break;
-
-                            default: throw new InvalidOperationException($"Invalid storage kind \"{operation.StorageKind}\".");
+                            args += ", ";
                         }
-                    }
-                    else
-                    {
+
                         AggregateType dstType = GetSrcVarType(inst, argIndex);
 
                         args += GetSoureExpr(context, operation.GetSource(argIndex), dstType);
@@ -173,9 +185,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
                     case Instruction.LoadShared:
                         return LoadShared(context, operation);
 
-                    case Instruction.LoadStorage:
-                        return LoadStorage(context, operation);
-
                     case Instruction.Lod:
                         return Lod(context, operation);
 
@@ -203,15 +212,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
                     case Instruction.StoreShared8:
                         return StoreShared8(context, operation);
 
-                    case Instruction.StoreStorage:
-                        return StoreStorage(context, operation);
-
-                    case Instruction.StoreStorage16:
-                        return StoreStorage16(context, operation);
-
-                    case Instruction.StoreStorage8:
-                        return StoreStorage8(context, operation);
-
                     case Instruction.TextureSample:
                         return TextureSample(context, operation);
 
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs
index 6cf36a2a6d..f42d98986e 100644
--- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs
@@ -85,7 +85,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
             Add(Instruction.Load,                     InstType.Special);
             Add(Instruction.LoadLocal,                InstType.Special);
             Add(Instruction.LoadShared,               InstType.Special);
-            Add(Instruction.LoadStorage,              InstType.Special);
             Add(Instruction.Lod,                      InstType.Special);
             Add(Instruction.LogarithmB2,              InstType.CallUnary,      "log2");
             Add(Instruction.LogicalAnd,               InstType.OpBinaryCom,    "&&",              9);
@@ -123,9 +122,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
             Add(Instruction.StoreShared,              InstType.Special);
             Add(Instruction.StoreShared16,            InstType.Special);
             Add(Instruction.StoreShared8,             InstType.Special);
-            Add(Instruction.StoreStorage,             InstType.Special);
-            Add(Instruction.StoreStorage16,           InstType.Special);
-            Add(Instruction.StoreStorage8,            InstType.Special);
             Add(Instruction.Subtract,                 InstType.OpBinary,       "-",               2);
             Add(Instruction.SwizzleAdd,               InstType.CallTernary,    HelperFunctionNames.SwizzleAdd);
             Add(Instruction.TextureSample,            InstType.Special);
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs
index dfc8197b64..c8084d9ddc 100644
--- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs
@@ -210,17 +210,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
             return $"{arrayName}[{offsetExpr}]";
         }
 
-        public static string LoadStorage(CodeGenContext context, AstOperation operation)
-        {
-            IAstNode src1 = operation.GetSource(0);
-            IAstNode src2 = operation.GetSource(1);
-
-            string indexExpr  = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
-            string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1));
-
-            return GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage);
-        }
-
         public static string Lod(CodeGenContext context, AstOperation operation)
         {
             AstTextureOperation texOp = (AstTextureOperation)operation;
@@ -326,60 +315,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
             return $"{HelperFunctionNames.StoreShared8}({offsetExpr}, {src})";
         }
 
-        public static string StoreStorage(CodeGenContext context, AstOperation operation)
-        {
-            IAstNode src1 = operation.GetSource(0);
-            IAstNode src2 = operation.GetSource(1);
-            IAstNode src3 = operation.GetSource(2);
-
-            string indexExpr  = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
-            string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1));
-
-            AggregateType srcType = OperandManager.GetNodeDestType(context, src3);
-
-            string src = TypeConversion.ReinterpretCast(context, src3, srcType, AggregateType.U32);
-
-            string sb = GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage);
-
-            return $"{sb} = {src}";
-        }
-
-        public static string StoreStorage16(CodeGenContext context, AstOperation operation)
-        {
-            IAstNode src1 = operation.GetSource(0);
-            IAstNode src2 = operation.GetSource(1);
-            IAstNode src3 = operation.GetSource(2);
-
-            string indexExpr  = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
-            string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1));
-
-            AggregateType srcType = OperandManager.GetNodeDestType(context, src3);
-
-            string src = TypeConversion.ReinterpretCast(context, src3, srcType, AggregateType.U32);
-
-            string sb = GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage);
-
-            return $"{HelperFunctionNames.StoreStorage16}({indexExpr}, {offsetExpr}, {src})";
-        }
-
-        public static string StoreStorage8(CodeGenContext context, AstOperation operation)
-        {
-            IAstNode src1 = operation.GetSource(0);
-            IAstNode src2 = operation.GetSource(1);
-            IAstNode src3 = operation.GetSource(2);
-
-            string indexExpr  = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
-            string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1));
-
-            AggregateType srcType = OperandManager.GetNodeDestType(context, src3);
-
-            string src = TypeConversion.ReinterpretCast(context, src3, srcType, AggregateType.U32);
-
-            string sb = GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage);
-
-            return $"{HelperFunctionNames.StoreStorage8}({indexExpr}, {offsetExpr}, {src})";
-        }
-
         public static string TextureSample(CodeGenContext context, AstOperation operation)
         {
             AstTextureOperation texOp = (AstTextureOperation)operation;
@@ -701,25 +636,34 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
             }
         }
 
-        private static string GenerateLoadOrStore(CodeGenContext context, AstOperation operation, bool isStore)
+        public static string GenerateLoadOrStore(CodeGenContext context, AstOperation operation, bool isStore)
         {
             StorageKind storageKind = operation.StorageKind;
 
             string varName;
             AggregateType varType;
             int srcIndex = 0;
-            int inputsCount = isStore ? operation.SourcesCount - 1 : operation.SourcesCount;
+            bool isStoreOrAtomic = operation.Inst == Instruction.Store || operation.Inst.IsAtomic();
+            int inputsCount = isStoreOrAtomic ? operation.SourcesCount - 1 : operation.SourcesCount;
+
+            if (operation.Inst == Instruction.AtomicCompareAndSwap)
+            {
+                inputsCount--;
+            }
 
             switch (storageKind)
             {
                 case StorageKind.ConstantBuffer:
+                case StorageKind.StorageBuffer:
                     if (!(operation.GetSource(srcIndex++) is AstOperand bindingIndex) || bindingIndex.Type != OperandType.Constant)
                     {
                         throw new InvalidOperationException($"First input of {operation.Inst} with {storageKind} storage must be a constant operand.");
                     }
 
                     int binding = bindingIndex.Value;
-                    BufferDefinition buffer = context.Config.Properties.ConstantBuffers[binding];
+                    BufferDefinition buffer = storageKind == StorageKind.ConstantBuffer
+                        ? context.Config.Properties.ConstantBuffers[binding]
+                        : context.Config.Properties.StorageBuffers[binding];
 
                     if (!(operation.GetSource(srcIndex++) is AstOperand fieldIndex) || fieldIndex.Type != OperandType.Constant)
                     {
@@ -825,15 +769,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
             return varName;
         }
 
-        private static string GetStorageBufferAccessor(string slotExpr, string offsetExpr, ShaderStage stage)
-        {
-            string sbName = OperandManager.GetShaderStagePrefix(stage);
-
-            sbName += "_" + DefaultNames.StorageNamePrefix;
-
-            return $"{sbName}[{slotExpr}].{DefaultNames.DataName}[{offsetExpr}]";
-        }
-
         private static string GetMask(int index)
         {
             return $".{"rgba".AsSpan(index, 1)}";
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs
index e34e4e076e..4fd1d17c47 100644
--- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs
@@ -118,6 +118,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
                     switch (operation.StorageKind)
                     {
                         case StorageKind.ConstantBuffer:
+                        case StorageKind.StorageBuffer:
                             if (!(operation.GetSource(0) is AstOperand bindingIndex) || bindingIndex.Type != OperandType.Constant)
                             {
                                 throw new InvalidOperationException($"First input of {operation.Inst} with {operation.StorageKind} storage must be a constant operand.");
@@ -128,7 +129,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
                                 throw new InvalidOperationException($"Second input of {operation.Inst} with {operation.StorageKind} storage must be a constant operand.");
                             }
 
-                            BufferDefinition buffer = context.Config.Properties.ConstantBuffers[bindingIndex.Value];
+                            BufferDefinition buffer = operation.StorageKind == StorageKind.ConstantBuffer
+                                ? context.Config.Properties.ConstantBuffers[bindingIndex.Value]
+                                : context.Config.Properties.StorageBuffers[bindingIndex.Value];
                             StructureField field = buffer.Type.Fields[fieldIndex.Value];
 
                             return field.Type & AggregateType.ElementTypeMask;
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs
index 7af6d316ed..c1bfa08836 100644
--- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs
@@ -24,7 +24,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
         public int InputVertices { get; }
 
         public Dictionary<int, Instruction> ConstantBuffers { get; } = new Dictionary<int, Instruction>();
-        public Instruction StorageBuffersArray { get; set; }
+        public Dictionary<int, Instruction> StorageBuffers { get; } = new Dictionary<int, Instruction>();
         public Instruction LocalMemory { get; set; }
         public Instruction SharedMemory { get; set; }
         public Dictionary<TextureMeta, SamplerType> SamplersTypes { get; } = new Dictionary<TextureMeta, SamplerType>();
@@ -308,7 +308,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
         {
             if ((type & AggregateType.Array) != 0)
             {
-                return TypeArray(GetType(type & ~AggregateType.Array), Constant(TypeU32(), length));
+                if (length > 0)
+                {
+                    return TypeArray(GetType(type & ~AggregateType.Array), Constant(TypeU32(), length));
+                }
+                else
+                {
+                    return TypeRuntimeArray(GetType(type & ~AggregateType.Array));
+                }
             }
             else if ((type & AggregateType.ElementCountMask) != 0)
             {
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs
index 7c242589ed..eb2db514d3 100644
--- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs
@@ -5,6 +5,7 @@ using Ryujinx.Graphics.Shader.Translation;
 using Spv.Generator;
 using System;
 using System.Collections.Generic;
+using System.Diagnostics;
 using System.Linq;
 using System.Numerics;
 using static Spv.Specification;
@@ -99,7 +100,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
             }
 
             DeclareConstantBuffers(context, context.Config.Properties.ConstantBuffers.Values);
-            DeclareStorageBuffers(context, context.Config.GetStorageBufferDescriptors());
+            DeclareStorageBuffers(context, context.Config.Properties.StorageBuffers.Values);
             DeclareSamplers(context, context.Config.GetTextureDescriptors());
             DeclareImages(context, context.Config.GetImageDescriptors());
             DeclareInputsAndOutputs(context, info);
@@ -127,6 +128,16 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
         }
 
         private static void DeclareConstantBuffers(CodeGenContext context, IEnumerable<BufferDefinition> buffers)
+        {
+            DeclareBuffers(context, buffers, isBuffer: false);
+        }
+
+        private static void DeclareStorageBuffers(CodeGenContext context, IEnumerable<BufferDefinition> buffers)
+        {
+            DeclareBuffers(context, buffers, isBuffer: true);
+        }
+
+        private static void DeclareBuffers(CodeGenContext context, IEnumerable<BufferDefinition> buffers, bool isBuffer)
         {
             HashSet<SpvInstruction> decoratedTypes = new HashSet<SpvInstruction>();
 
@@ -155,6 +166,12 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
                             context.Decorate(structFieldTypes[fieldIndex], Decoration.ArrayStride, (LiteralInteger)fieldSize);
                         }
 
+                        // Zero lengths are assumed to be a "runtime array" (which does not have a explicit length
+                        // specified on the shader, and instead assumes the bound buffer length).
+                        // It is only valid as the last struct element.
+
+                        Debug.Assert(field.ArrayLength > 0 || fieldIndex == buffer.Type.Fields.Length - 1);
+
                         offset += fieldSize * field.ArrayLength;
                     }
                     else
@@ -163,56 +180,37 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
                     }
                 }
 
-                var ubStructType = context.TypeStruct(false, structFieldTypes);
+                var structType = context.TypeStruct(false, structFieldTypes);
 
-                if (decoratedTypes.Add(ubStructType))
+                if (decoratedTypes.Add(structType))
                 {
-                    context.Decorate(ubStructType, Decoration.Block);
+                    context.Decorate(structType, isBuffer ? Decoration.BufferBlock : Decoration.Block);
 
                     for (int fieldIndex = 0; fieldIndex < structFieldOffsets.Length; fieldIndex++)
                     {
-                        context.MemberDecorate(ubStructType, fieldIndex, Decoration.Offset, (LiteralInteger)structFieldOffsets[fieldIndex]);
+                        context.MemberDecorate(structType, fieldIndex, Decoration.Offset, (LiteralInteger)structFieldOffsets[fieldIndex]);
                     }
                 }
 
-                var ubPointerType = context.TypePointer(StorageClass.Uniform, ubStructType);
-                var ubVariable = context.Variable(ubPointerType, StorageClass.Uniform);
+                var pointerType = context.TypePointer(StorageClass.Uniform, structType);
+                var variable = context.Variable(pointerType, StorageClass.Uniform);
 
-                context.Name(ubVariable, buffer.Name);
-                context.Decorate(ubVariable, Decoration.DescriptorSet, (LiteralInteger)buffer.Set);
-                context.Decorate(ubVariable, Decoration.Binding, (LiteralInteger)buffer.Binding);
-                context.AddGlobalVariable(ubVariable);
-                context.ConstantBuffers.Add(buffer.Binding, ubVariable);
+                context.Name(variable, buffer.Name);
+                context.Decorate(variable, Decoration.DescriptorSet, (LiteralInteger)buffer.Set);
+                context.Decorate(variable, Decoration.Binding, (LiteralInteger)buffer.Binding);
+                context.AddGlobalVariable(variable);
+
+                if (isBuffer)
+                {
+                    context.StorageBuffers.Add(buffer.Binding, variable);
+                }
+                else
+                {
+                    context.ConstantBuffers.Add(buffer.Binding, variable);
+                }
             }
         }
 
-        private static void DeclareStorageBuffers(CodeGenContext context, BufferDescriptor[] descriptors)
-        {
-            if (descriptors.Length == 0)
-            {
-                return;
-            }
-
-            int setIndex = context.Config.Options.TargetApi == TargetApi.Vulkan ? 1 : 0;
-            int count = descriptors.Max(x => x.Slot) + 1;
-
-            var sbArrayType = context.TypeRuntimeArray(context.TypeU32());
-            context.Decorate(sbArrayType, Decoration.ArrayStride, (LiteralInteger)4);
-            var sbStructType = context.TypeStruct(true, sbArrayType);
-            context.Decorate(sbStructType, Decoration.BufferBlock);
-            context.MemberDecorate(sbStructType, 0, Decoration.Offset, (LiteralInteger)0);
-            var sbStructArrayType = context.TypeArray(sbStructType, context.Constant(context.TypeU32(), count));
-            var sbPointerType = context.TypePointer(StorageClass.Uniform, sbStructArrayType);
-            var sbVariable = context.Variable(sbPointerType, StorageClass.Uniform);
-
-            context.Name(sbVariable, $"{GetStagePrefix(context.Config.Stage)}_s");
-            context.Decorate(sbVariable, Decoration.DescriptorSet, (LiteralInteger)setIndex);
-            context.Decorate(sbVariable, Decoration.Binding, (LiteralInteger)context.Config.FirstStorageBufferBinding);
-            context.AddGlobalVariable(sbVariable);
-
-            context.StorageBuffersArray = sbVariable;
-        }
-
         private static void DeclareSamplers(CodeGenContext context, TextureDescriptor[] descriptors)
         {
             foreach (var descriptor in descriptors)
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs
index f088a47f35..4be0c62be8 100644
--- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs
@@ -99,7 +99,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
             Add(Instruction.Load,                     GenerateLoad);
             Add(Instruction.LoadLocal,                GenerateLoadLocal);
             Add(Instruction.LoadShared,               GenerateLoadShared);
-            Add(Instruction.LoadStorage,              GenerateLoadStorage);
             Add(Instruction.Lod,                      GenerateLod);
             Add(Instruction.LogarithmB2,              GenerateLogarithmB2);
             Add(Instruction.LogicalAnd,               GenerateLogicalAnd);
@@ -137,9 +136,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
             Add(Instruction.StoreShared,              GenerateStoreShared);
             Add(Instruction.StoreShared16,            GenerateStoreShared16);
             Add(Instruction.StoreShared8,             GenerateStoreShared8);
-            Add(Instruction.StoreStorage,             GenerateStoreStorage);
-            Add(Instruction.StoreStorage16,           GenerateStoreStorage16);
-            Add(Instruction.StoreStorage8,            GenerateStoreStorage8);
             Add(Instruction.Subtract,                 GenerateSubtract);
             Add(Instruction.SwizzleAdd,               GenerateSwizzleAdd);
             Add(Instruction.TextureSample,            GenerateTextureSample);
@@ -889,14 +885,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
             return new OperationResult(AggregateType.U32, value);
         }
 
-        private static OperationResult GenerateLoadStorage(CodeGenContext context, AstOperation operation)
-        {
-            var elemPointer = GetStorageElemPointer(context, operation);
-            var value = context.Load(context.TypeU32(), elemPointer);
-
-            return new OperationResult(AggregateType.U32, value);
-        }
-
         private static OperationResult GenerateLod(CodeGenContext context, AstOperation operation)
         {
             AstTextureOperation texOp = (AstTextureOperation)operation;
@@ -1307,28 +1295,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
             return OperationResult.Invalid;
         }
 
-        private static OperationResult GenerateStoreStorage(CodeGenContext context, AstOperation operation)
-        {
-            var elemPointer = GetStorageElemPointer(context, operation);
-            context.Store(elemPointer, context.Get(AggregateType.U32, operation.GetSource(2)));
-
-            return OperationResult.Invalid;
-        }
-
-        private static OperationResult GenerateStoreStorage16(CodeGenContext context, AstOperation operation)
-        {
-            GenerateStoreStorageSmallInt(context, operation, 16);
-
-            return OperationResult.Invalid;
-        }
-
-        private static OperationResult GenerateStoreStorage8(CodeGenContext context, AstOperation operation)
-        {
-            GenerateStoreStorageSmallInt(context, operation, 8);
-
-            return OperationResult.Invalid;
-        }
-
         private static OperationResult GenerateSubtract(CodeGenContext context, AstOperation operation)
         {
             return GenerateBinary(context, operation, context.Delegates.FSub, context.Delegates.ISub);
@@ -1849,13 +1815,13 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
             AstOperation operation,
             Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitU)
         {
-            var value = context.GetU32(operation.GetSource(2));
+            var value = context.GetU32(operation.GetSource(operation.SourcesCount - 1));
 
             SpvInstruction elemPointer;
 
             if (operation.StorageKind == StorageKind.StorageBuffer)
             {
-                elemPointer = GetStorageElemPointer(context, operation);
+                elemPointer = GetStoragePointer(context, operation, out _);
             }
             else if (operation.StorageKind == StorageKind.SharedMemory)
             {
@@ -1875,14 +1841,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
 
         private static OperationResult GenerateAtomicMemoryCas(CodeGenContext context, AstOperation operation)
         {
-            var value0 = context.GetU32(operation.GetSource(2));
-            var value1 = context.GetU32(operation.GetSource(3));
+            var value0 = context.GetU32(operation.GetSource(operation.SourcesCount - 2));
+            var value1 = context.GetU32(operation.GetSource(operation.SourcesCount - 1));
 
             SpvInstruction elemPointer;
 
             if (operation.StorageKind == StorageKind.StorageBuffer)
             {
-                elemPointer = GetStorageElemPointer(context, operation);
+                elemPointer = GetStoragePointer(context, operation, out _);
             }
             else if (operation.StorageKind == StorageKind.SharedMemory)
             {
@@ -1901,17 +1867,33 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
         }
 
         private static OperationResult GenerateLoadOrStore(CodeGenContext context, AstOperation operation, bool isStore)
+        {
+            SpvInstruction pointer = GetStoragePointer(context, operation, out AggregateType varType);
+
+            if (isStore)
+            {
+                context.Store(pointer, context.Get(varType, operation.GetSource(operation.SourcesCount - 1)));
+                return OperationResult.Invalid;
+            }
+            else
+            {
+                var result = context.Load(context.GetType(varType), pointer);
+                return new OperationResult(varType, result);
+            }
+        }
+
+        private static SpvInstruction GetStoragePointer(CodeGenContext context, AstOperation operation, out AggregateType varType)
         {
             StorageKind storageKind = operation.StorageKind;
 
             StorageClass storageClass;
             SpvInstruction baseObj;
-            AggregateType varType;
             int srcIndex = 0;
 
             switch (storageKind)
             {
                 case StorageKind.ConstantBuffer:
+                case StorageKind.StorageBuffer:
                     if (!(operation.GetSource(srcIndex++) is AstOperand bindingIndex) || bindingIndex.Type != OperandType.Constant)
                     {
                         throw new InvalidOperationException($"First input of {operation.Inst} with {storageKind} storage must be a constant operand.");
@@ -1922,12 +1904,16 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
                         throw new InvalidOperationException($"Second input of {operation.Inst} with {storageKind} storage must be a constant operand.");
                     }
 
-                    BufferDefinition buffer = context.Config.Properties.ConstantBuffers[bindingIndex.Value];
+                    BufferDefinition buffer = storageKind == StorageKind.ConstantBuffer
+                        ? context.Config.Properties.ConstantBuffers[bindingIndex.Value]
+                        : context.Config.Properties.StorageBuffers[bindingIndex.Value];
                     StructureField field = buffer.Type.Fields[fieldIndex.Value];
 
                     storageClass = StorageClass.Uniform;
                     varType = field.Type & AggregateType.ElementTypeMask;
-                    baseObj = context.ConstantBuffers[bindingIndex.Value];
+                    baseObj = storageKind == StorageKind.ConstantBuffer
+                        ? context.ConstantBuffers[bindingIndex.Value]
+                        : context.StorageBuffers[bindingIndex.Value];
                     break;
 
                 case StorageKind.Input:
@@ -1993,7 +1979,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
                     throw new InvalidOperationException($"Invalid storage kind {storageKind}.");
             }
 
-            int inputsCount = (isStore ? operation.SourcesCount - 1 : operation.SourcesCount) - srcIndex;
+            bool isStoreOrAtomic = operation.Inst == Instruction.Store || operation.Inst.IsAtomic();
+            int inputsCount = (isStoreOrAtomic ? operation.SourcesCount - 1 : operation.SourcesCount) - srcIndex;
+
+            if (operation.Inst == Instruction.AtomicCompareAndSwap)
+            {
+                inputsCount--;
+            }
+
             SpvInstruction e0, e1, e2;
             SpvInstruction pointer;
 
@@ -2030,16 +2023,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
                     break;
             }
 
-            if (isStore)
-            {
-                context.Store(pointer, context.Get(varType, operation.GetSource(srcIndex)));
-                return OperationResult.Invalid;
-            }
-            else
-            {
-                var result = context.Load(context.GetType(varType), pointer);
-                return new OperationResult(varType, result);
-            }
+            return pointer;
         }
 
         private static SpvInstruction GetScalarInput(CodeGenContext context, IoVariable ioVariable)
@@ -2068,25 +2052,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
             GenerateStoreSmallInt(context, elemPointer, bitOffset, value, bitSize);
         }
 
-        private static void GenerateStoreStorageSmallInt(CodeGenContext context, AstOperation operation, int bitSize)
-        {
-            var i0 = context.Get(AggregateType.S32, operation.GetSource(0));
-            var offset = context.Get(AggregateType.U32, operation.GetSource(1));
-            var value = context.Get(AggregateType.U32, operation.GetSource(2));
-
-            var wordOffset = context.ShiftRightLogical(context.TypeU32(), offset, context.Constant(context.TypeU32(), 2));
-            var bitOffset = context.BitwiseAnd(context.TypeU32(), offset, context.Constant(context.TypeU32(), 3));
-            bitOffset = context.ShiftLeftLogical(context.TypeU32(), bitOffset, context.Constant(context.TypeU32(), 3));
-
-            var sbVariable = context.StorageBuffersArray;
-
-            var i1 = context.Constant(context.TypeS32(), 0);
-
-            var elemPointer = context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeU32()), sbVariable, i0, i1, wordOffset);
-
-            GenerateStoreSmallInt(context, elemPointer, bitOffset, value, bitSize);
-        }
-
         private static void GenerateStoreSmallInt(
             CodeGenContext context,
             SpvInstruction elemPointer,
@@ -2173,16 +2138,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
             }
         }
 
-        private static SpvInstruction GetStorageElemPointer(CodeGenContext context, AstOperation operation)
-        {
-            var sbVariable = context.StorageBuffersArray;
-            var i0 = context.Get(AggregateType.S32, operation.GetSource(0));
-            var i1 = context.Constant(context.TypeS32(), 0);
-            var i2 = context.Get(AggregateType.S32, operation.GetSource(1));
-
-            return context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeU32()), sbVariable, i0, i1, i2);
-        }
-
         private static OperationResult GenerateUnary(
             CodeGenContext context,
             AstOperation operation,
diff --git a/src/Ryujinx.Graphics.Shader/Constants.cs b/src/Ryujinx.Graphics.Shader/Constants.cs
index c6f9ef494c..7f1445ed04 100644
--- a/src/Ryujinx.Graphics.Shader/Constants.cs
+++ b/src/Ryujinx.Graphics.Shader/Constants.cs
@@ -10,7 +10,5 @@ namespace Ryujinx.Graphics.Shader
         public const int NvnBaseVertexByteOffset = 0x640;
         public const int NvnBaseInstanceByteOffset = 0x644;
         public const int NvnDrawIndexByteOffset = 0x648;
-
-        public const int StorageAlignment = 16;
     }
 }
\ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs
index 3be5088e45..473964def2 100644
--- a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs
+++ b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs
@@ -79,7 +79,7 @@ namespace Ryujinx.Graphics.Shader
         /// <returns>Binding number</returns>
         int QueryBindingConstantBuffer(int index)
         {
-            return index;
+            return index + 1;
         }
 
         /// <summary>
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs
index 91c2323037..7369635527 100644
--- a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs
@@ -164,6 +164,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
 
             if (op.Ccc == Ccc.T)
             {
+                context.PrepareForReturn();
                 context.Return();
             }
             else
@@ -175,6 +176,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
                 {
                     Operand lblSkip = Label();
                     context.BranchIfFalse(lblSkip, cond);
+                    context.PrepareForReturn();
                     context.Return();
                     context.MarkLabel(lblSkip);
                 }
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs
index 6f5913eb31..9aa7382005 100644
--- a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs
@@ -336,13 +336,12 @@ namespace Ryujinx.Graphics.Shader.Instructions
             int offset,
             bool extended)
         {
-            bool isSmallInt = size < LsSize.B32;
-
             int count = GetVectorCount(size);
+            StorageKind storageKind = GetStorageKind(size);
 
-            (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
+            (_, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
 
-            Operand bitOffset = GetBitOffset(context, addrLow);
+            Operand srcA = context.Copy(new Operand(new Register(ra, RegisterType.Gpr)));
 
             for (int index = 0; index < count; index++)
             {
@@ -353,12 +352,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
                     break;
                 }
 
-                Operand value = context.LoadGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh);
-
-                if (isSmallInt)
-                {
-                    value = ExtractSmallInt(context, size, bitOffset, value);
-                }
+                Operand value = context.Load(storageKind, context.IAdd(srcA, Const(offset + index * 4)), addrHigh);
 
                 context.Copy(Register(dest), value);
             }
@@ -445,10 +439,11 @@ namespace Ryujinx.Graphics.Shader.Instructions
             }
 
             int count = GetVectorCount((LsSize)size);
+            StorageKind storageKind = GetStorageKind((LsSize)size);
 
-            (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
+            (_, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
 
-            Operand bitOffset = GetBitOffset(context, addrLow);
+            Operand srcA = context.Copy(new Operand(new Register(ra, RegisterType.Gpr)));
 
             for (int index = 0; index < count; index++)
             {
@@ -456,23 +451,24 @@ namespace Ryujinx.Graphics.Shader.Instructions
 
                 Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
 
-                Operand addrLowOffset = context.IAdd(addrLow, Const(index * 4));
+                Operand addrLowOffset = context.IAdd(srcA, Const(offset + index * 4));
 
-                if (size == LsSize2.U8 || size == LsSize2.S8)
-                {
-                    context.StoreGlobal8(addrLowOffset, addrHigh, value);
-                }
-                else if (size == LsSize2.U16 || size == LsSize2.S16)
-                {
-                    context.StoreGlobal16(addrLowOffset, addrHigh, value);
-                }
-                else
-                {
-                    context.StoreGlobal(addrLowOffset, addrHigh, value);
-                }
+                context.Store(storageKind, addrLowOffset, addrHigh, value);
             }
         }
 
+        private static StorageKind GetStorageKind(LsSize size)
+        {
+            return size switch
+            {
+                LsSize.U8 => StorageKind.GlobalMemoryU8,
+                LsSize.S8 => StorageKind.GlobalMemoryS8,
+                LsSize.U16 => StorageKind.GlobalMemoryU16,
+                LsSize.S16 => StorageKind.GlobalMemoryS16,
+                _ => StorageKind.GlobalMemory
+            };
+        }
+
         private static int GetVectorCount(LsSize size)
         {
             switch (size)
diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs
index f7afe50712..aecb672495 100644
--- a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs
+++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs
@@ -79,10 +79,8 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
         ImageAtomic,
         IsNan,
         Load,
-        LoadGlobal,
         LoadLocal,
         LoadShared,
-        LoadStorage,
         Lod,
         LogarithmB2,
         LogicalAnd,
@@ -117,16 +115,10 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
         Sine,
         SquareRoot,
         Store,
-        StoreGlobal,
-        StoreGlobal16,
-        StoreGlobal8,
         StoreLocal,
         StoreShared,
         StoreShared16,
         StoreShared8,
-        StoreStorage,
-        StoreStorage16,
-        StoreStorage8,
         Subtract,
         SwizzleAdd,
         TextureSample,
diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/StorageKind.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/StorageKind.cs
index 5935744384..2b5dd1dec9 100644
--- a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/StorageKind.cs
+++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/StorageKind.cs
@@ -11,7 +11,12 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
         StorageBuffer,
         LocalMemory,
         SharedMemory,
-        GlobalMemory
+        GlobalMemory,
+        // TODO: Remove those and store type as a field on the Operation class itself.
+        GlobalMemoryS8,
+        GlobalMemoryS16,
+        GlobalMemoryU8,
+        GlobalMemoryU16
     }
 
     static class StorageKindExtensions
diff --git a/src/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj b/src/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj
index 2efcbca4fb..86de2e755e 100644
--- a/src/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj
+++ b/src/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj
@@ -11,7 +11,6 @@
 
   <ItemGroup>
     <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\AtomicMinMaxS32Shared.glsl" />
-    <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\AtomicMinMaxS32Storage.glsl" />
     <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\MultiplyHighS32.glsl" />
     <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\MultiplyHighU32.glsl" />
     <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\Shuffle.glsl" />
@@ -19,7 +18,6 @@
     <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleUp.glsl" />
     <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleXor.glsl" />
     <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\StoreSharedSmallInt.glsl" />
-    <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\StoreStorageSmallInt.glsl" />
     <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\SwizzleAdd.glsl" />
   </ItemGroup>
 
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs
index d45f8d4eea..c348b5d93d 100644
--- a/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs
@@ -6,7 +6,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
     enum HelperFunctionsMask
     {
         AtomicMinMaxS32Shared  = 1 << 0,
-        AtomicMinMaxS32Storage = 1 << 1,
         MultiplyHighS32        = 1 << 2,
         MultiplyHighU32        = 1 << 3,
         Shuffle                = 1 << 4,
@@ -14,7 +13,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
         ShuffleUp              = 1 << 6,
         ShuffleXor             = 1 << 7,
         StoreSharedSmallInt    = 1 << 8,
-        StoreStorageSmallInt   = 1 << 9,
         SwizzleAdd             = 1 << 10,
         FSI                    = 1 << 11
     }
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs
index 44f0fad952..6e2013501e 100644
--- a/src/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs
@@ -90,10 +90,8 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
             Add(Instruction.ImageAtomic,              AggregateType.S32);
             Add(Instruction.IsNan,                    AggregateType.Bool,   AggregateType.Scalar);
             Add(Instruction.Load,                     AggregateType.FP32);
-            Add(Instruction.LoadGlobal,               AggregateType.U32,    AggregateType.S32,     AggregateType.S32);
             Add(Instruction.LoadLocal,                AggregateType.U32,    AggregateType.S32);
             Add(Instruction.LoadShared,               AggregateType.U32,    AggregateType.S32);
-            Add(Instruction.LoadStorage,              AggregateType.U32,    AggregateType.S32,     AggregateType.S32);
             Add(Instruction.Lod,                      AggregateType.FP32);
             Add(Instruction.LogarithmB2,              AggregateType.Scalar, AggregateType.Scalar);
             Add(Instruction.LogicalAnd,               AggregateType.Bool,   AggregateType.Bool,    AggregateType.Bool);
@@ -123,14 +121,10 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
             Add(Instruction.Sine,                     AggregateType.Scalar, AggregateType.Scalar);
             Add(Instruction.SquareRoot,               AggregateType.Scalar, AggregateType.Scalar);
             Add(Instruction.Store,                    AggregateType.Void);
-            Add(Instruction.StoreGlobal,              AggregateType.Void,   AggregateType.S32,     AggregateType.S32,     AggregateType.U32);
             Add(Instruction.StoreLocal,               AggregateType.Void,   AggregateType.S32,     AggregateType.U32);
             Add(Instruction.StoreShared,              AggregateType.Void,   AggregateType.S32,     AggregateType.U32);
             Add(Instruction.StoreShared16,            AggregateType.Void,   AggregateType.S32,     AggregateType.U32);
             Add(Instruction.StoreShared8,             AggregateType.Void,   AggregateType.S32,     AggregateType.U32);
-            Add(Instruction.StoreStorage,             AggregateType.Void,   AggregateType.S32,     AggregateType.S32,     AggregateType.U32);
-            Add(Instruction.StoreStorage16,           AggregateType.Void,   AggregateType.S32,     AggregateType.S32,     AggregateType.U32);
-            Add(Instruction.StoreStorage8,            AggregateType.Void,   AggregateType.S32,     AggregateType.S32,     AggregateType.U32);
             Add(Instruction.Subtract,                 AggregateType.Scalar, AggregateType.Scalar,  AggregateType.Scalar);
             Add(Instruction.SwizzleAdd,               AggregateType.FP32,   AggregateType.FP32,    AggregateType.FP32,    AggregateType.S32);
             Add(Instruction.TextureSample,            AggregateType.FP32);
@@ -166,7 +160,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
             {
                 return AggregateType.FP32;
             }
-            else if (inst == Instruction.Call || inst == Instruction.Load || inst == Instruction.Store)
+            else if (inst == Instruction.Call || inst == Instruction.Load || inst == Instruction.Store || inst.IsAtomic())
             {
                 return AggregateType.S32;
             }
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/ShaderProperties.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/ShaderProperties.cs
index 061c89edd0..157c5937dd 100644
--- a/src/Ryujinx.Graphics.Shader/StructuredIr/ShaderProperties.cs
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/ShaderProperties.cs
@@ -5,17 +5,25 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
     class ShaderProperties
     {
         private readonly Dictionary<int, BufferDefinition> _constantBuffers;
+        private readonly Dictionary<int, BufferDefinition> _storageBuffers;
 
         public IReadOnlyDictionary<int, BufferDefinition> ConstantBuffers => _constantBuffers;
+        public IReadOnlyDictionary<int, BufferDefinition> StorageBuffers => _storageBuffers;
 
         public ShaderProperties()
         {
             _constantBuffers = new Dictionary<int, BufferDefinition>();
+            _storageBuffers = new Dictionary<int, BufferDefinition>();
         }
 
         public void AddConstantBuffer(int binding, BufferDefinition definition)
         {
             _constantBuffers[binding] = definition;
         }
+
+        public void AddStorageBuffer(int binding, BufferDefinition definition)
+        {
+            _storageBuffers[binding] = definition;
+        }
     }
 }
\ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs
index 4405c07aae..a8f1327665 100644
--- a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs
@@ -280,10 +280,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
                     {
                         context.Info.HelperFunctionsMask |= HelperFunctionsMask.AtomicMinMaxS32Shared;
                     }
-                    else if (operation.StorageKind == StorageKind.StorageBuffer)
-                    {
-                        context.Info.HelperFunctionsMask |= HelperFunctionsMask.AtomicMinMaxS32Storage;
-                    }
                     break;
                 case Instruction.MultiplyHighS32:
                     context.Info.HelperFunctionsMask |= HelperFunctionsMask.MultiplyHighS32;
@@ -307,10 +303,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
                 case Instruction.StoreShared8:
                     context.Info.HelperFunctionsMask |= HelperFunctionsMask.StoreSharedSmallInt;
                     break;
-                case Instruction.StoreStorage16:
-                case Instruction.StoreStorage8:
-                    context.Info.HelperFunctionsMask |= HelperFunctionsMask.StoreStorageSmallInt;
-                    break;
                 case Instruction.SwizzleAdd:
                     context.Info.HelperFunctionsMask |= HelperFunctionsMask.SwizzleAdd;
                     break;
diff --git a/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs b/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs
index 6d4104ceeb..be0cba8090 100644
--- a/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs
+++ b/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs
@@ -57,6 +57,56 @@ namespace Ryujinx.Graphics.Shader.Translation
             return context.Add(Instruction.AtomicXor, storageKind, Local(), a, b, c);
         }
 
+        public static Operand AtomicAdd(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
+        {
+            return context.Add(Instruction.AtomicAdd, storageKind, Local(), Const(binding), e0, e1, value);
+        }
+
+        public static Operand AtomicAnd(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
+        {
+            return context.Add(Instruction.AtomicAnd, storageKind, Local(), Const(binding), e0, e1, value);
+        }
+
+        public static Operand AtomicCompareAndSwap(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand compare, Operand value)
+        {
+            return context.Add(Instruction.AtomicCompareAndSwap, storageKind, Local(), Const(binding), e0, e1, compare, value);
+        }
+
+        public static Operand AtomicMaxS32(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
+        {
+            return context.Add(Instruction.AtomicMaxS32, storageKind, Local(), Const(binding), e0, e1, value);
+        }
+
+        public static Operand AtomicMaxU32(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
+        {
+            return context.Add(Instruction.AtomicMaxU32, storageKind, Local(), Const(binding), e0, e1, value);
+        }
+
+        public static Operand AtomicMinS32(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
+        {
+            return context.Add(Instruction.AtomicMinS32, storageKind, Local(), Const(binding), e0, e1, value);
+        }
+
+        public static Operand AtomicMinU32(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
+        {
+            return context.Add(Instruction.AtomicMinU32, storageKind, Local(), Const(binding), e0, e1, value);
+        }
+
+        public static Operand AtomicOr(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
+        {
+            return context.Add(Instruction.AtomicOr, storageKind, Local(), Const(binding), e0, e1, value);
+        }
+
+        public static Operand AtomicSwap(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
+        {
+            return context.Add(Instruction.AtomicSwap, storageKind, Local(), Const(binding), e0, e1, value);
+        }
+
+        public static Operand AtomicXor(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
+        {
+            return context.Add(Instruction.AtomicXor, storageKind, Local(), Const(binding), e0, e1, value);
+        }
+
         public static Operand Ballot(this EmitterContext context, Operand a)
         {
             return context.Add(Instruction.Ballot, Local(), a);
@@ -554,6 +604,11 @@ namespace Ryujinx.Graphics.Shader.Translation
             return context.Add(fpType | Instruction.IsNan, Local(), a);
         }
 
+        public static Operand Load(this EmitterContext context, StorageKind storageKind, Operand e0, Operand e1)
+        {
+            return context.Add(Instruction.Load, storageKind, Local(), e0, e1);
+        }
+
         public static Operand Load(this EmitterContext context, StorageKind storageKind, int binding)
         {
             return context.Add(Instruction.Load, storageKind, Local(), Const(binding));
@@ -606,11 +661,6 @@ namespace Ryujinx.Graphics.Shader.Translation
                 : context.Load(storageKind, (int)ioVariable, arrayIndex, elemIndex);
         }
 
-        public static Operand LoadGlobal(this EmitterContext context, Operand a, Operand b)
-        {
-            return context.Add(Instruction.LoadGlobal, Local(), a, b);
-        }
-
         public static Operand LoadLocal(this EmitterContext context, Operand a)
         {
             return context.Add(Instruction.LoadLocal, Local(), a);
@@ -655,7 +705,6 @@ namespace Ryujinx.Graphics.Shader.Translation
 
         public static void Return(this EmitterContext context)
         {
-            context.PrepareForReturn();
             context.Add(Instruction.Return);
         }
 
@@ -699,6 +748,16 @@ namespace Ryujinx.Graphics.Shader.Translation
             return context.Add(Instruction.ShuffleXor, (Local(), Local()), a, b, c);
         }
 
+        public static Operand Store(this EmitterContext context, StorageKind storageKind, Operand e0, Operand e1, Operand value)
+        {
+            return context.Add(Instruction.Store, storageKind, null, e0, e1, value);
+        }
+
+        public static Operand Store(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
+        {
+            return context.Add(Instruction.Store, storageKind, null, Const(binding), e0, e1, value);
+        }
+
         public static Operand Store(
             this EmitterContext context,
             StorageKind storageKind,
@@ -738,21 +797,6 @@ namespace Ryujinx.Graphics.Shader.Translation
                 : context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), arrayIndex, elemIndex, value);
         }
 
-        public static Operand StoreGlobal(this EmitterContext context, Operand a, Operand b, Operand c)
-        {
-            return context.Add(Instruction.StoreGlobal, null, a, b, c);
-        }
-
-        public static Operand StoreGlobal16(this EmitterContext context, Operand a, Operand b, Operand c)
-        {
-            return context.Add(Instruction.StoreGlobal16, null, a, b, c);
-        }
-
-        public static Operand StoreGlobal8(this EmitterContext context, Operand a, Operand b, Operand c)
-        {
-            return context.Add(Instruction.StoreGlobal8, null, a, b, c);
-        }
-
         public static Operand StoreLocal(this EmitterContext context, Operand a, Operand b)
         {
             return context.Add(Instruction.StoreLocal, null, a, b);
diff --git a/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs b/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs
deleted file mode 100644
index a81d0fc4bb..0000000000
--- a/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs
+++ /dev/null
@@ -1,54 +0,0 @@
-using Ryujinx.Graphics.Shader.IntermediateRepresentation;
-
-namespace Ryujinx.Graphics.Shader.Translation
-{
-    static class GlobalMemory
-    {
-        private const int StorageDescsBaseOffset = 0x44; // In words.
-
-        public const int StorageDescSize = 4; // In words.
-        public const int StorageMaxCount = 16;
-
-        public const int StorageDescsSize = StorageDescSize * StorageMaxCount;
-
-        public const int UbeBaseOffset = 0x98; // In words.
-        public const int UbeMaxCount   = 9;
-        public const int UbeDescsSize  = StorageDescSize * UbeMaxCount;
-        public const int UbeFirstCbuf  = 8;
-
-        public const int DriverReservedCb = 0;
-
-        public static bool UsesGlobalMemory(Instruction inst, StorageKind storageKind)
-        {
-            return (inst.IsAtomic() && storageKind == StorageKind.GlobalMemory) ||
-                    inst == Instruction.LoadGlobal ||
-                    inst == Instruction.StoreGlobal ||
-                    inst == Instruction.StoreGlobal16 ||
-                    inst == Instruction.StoreGlobal8;
-        }
-
-        public static int GetStorageCbOffset(ShaderStage stage, int slot)
-        {
-            return GetStorageBaseCbOffset(stage) + slot * StorageDescSize;
-        }
-
-        public static int GetStorageBaseCbOffset(ShaderStage stage)
-        {
-            return stage switch
-            {
-                ShaderStage.Compute                => StorageDescsBaseOffset + 2 * StorageDescsSize,
-                ShaderStage.Vertex                 => StorageDescsBaseOffset,
-                ShaderStage.TessellationControl    => StorageDescsBaseOffset + 1 * StorageDescsSize,
-                ShaderStage.TessellationEvaluation => StorageDescsBaseOffset + 2 * StorageDescsSize,
-                ShaderStage.Geometry               => StorageDescsBaseOffset + 3 * StorageDescsSize,
-                ShaderStage.Fragment               => StorageDescsBaseOffset + 4 * StorageDescsSize,
-                _ => 0
-            };
-        }
-
-        public static int GetConstantUbeOffset(int slot)
-        {
-            return UbeBaseOffset + slot * StorageDescSize;
-        }
-    }
-}
\ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs b/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs
index 206facd467..7dd267f3ce 100644
--- a/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs
+++ b/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs
@@ -19,6 +19,14 @@ namespace Ryujinx.Graphics.Shader.Translation
             _stage = stage;
         }
 
+        public int AddFunction(Function function)
+        {
+            int functionId = _functionList.Count;
+            _functionList.Add(function);
+
+            return functionId;
+        }
+
         public int GetOrCreateFunctionId(HelperFunctionName functionName)
         {
             if (_functionIds.TryGetValue(functionName, out int functionId))
@@ -27,8 +35,7 @@ namespace Ryujinx.Graphics.Shader.Translation
             }
 
             Function function = GenerateFunction(functionName);
-            functionId = _functionList.Count;
-            _functionList.Add(function);
+            functionId = AddFunction(function);
             _functionIds.Add(functionName, functionId);
 
             return functionId;
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs
index 7758b4c617..14904b2607 100644
--- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs
+++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs
@@ -1,483 +1,1140 @@
 using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System;
 using System.Collections.Generic;
+using System.Linq;
 
 using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
-using static Ryujinx.Graphics.Shader.Translation.GlobalMemory;
 
 namespace Ryujinx.Graphics.Shader.Translation.Optimizations
 {
     static class GlobalToStorage
     {
+        private const int DriverReservedCb = 0;
+
+        enum LsMemoryType
+        {
+            Local,
+            Shared
+        }
+
+        private class GtsContext
+        {
+            private struct Entry
+            {
+                public readonly int FunctionId;
+                public readonly Instruction Inst;
+                public readonly StorageKind StorageKind;
+                public readonly bool IsMultiTarget;
+                public readonly IReadOnlyList<uint> TargetCbs;
+
+                public Entry(
+                    int functionId,
+                    Instruction inst,
+                    StorageKind storageKind,
+                    bool isMultiTarget,
+                    IReadOnlyList<uint> targetCbs)
+                {
+                    FunctionId = functionId;
+                    Inst = inst;
+                    StorageKind = storageKind;
+                    IsMultiTarget = isMultiTarget;
+                    TargetCbs = targetCbs;
+                }
+            }
+
+            private struct LsKey : IEquatable<LsKey>
+            {
+                public readonly Operand BaseOffset;
+                public readonly int ConstOffset;
+                public readonly LsMemoryType Type;
+
+                public LsKey(Operand baseOffset, int constOffset, LsMemoryType type)
+                {
+                    BaseOffset = baseOffset;
+                    ConstOffset = constOffset;
+                    Type = type;
+                }
+
+                public override int GetHashCode()
+                {
+                    return HashCode.Combine(BaseOffset, ConstOffset, Type);
+                }
+
+                public override bool Equals(object obj)
+                {
+                    return obj is LsKey other && Equals(other);
+                }
+
+                public bool Equals(LsKey other)
+                {
+                    return other.BaseOffset == BaseOffset && other.ConstOffset == ConstOffset && other.Type == Type;
+                }
+            }
+
+            private readonly List<Entry> _entries;
+            private readonly Dictionary<LsKey, Dictionary<uint, SearchResult>> _sharedEntries;
+            private readonly HelperFunctionManager _hfm;
+
+            public GtsContext(HelperFunctionManager hfm)
+            {
+                _entries = new List<Entry>();
+                _sharedEntries = new Dictionary<LsKey, Dictionary<uint, SearchResult>>();
+                _hfm = hfm;
+            }
+
+            public int AddFunction(Operation baseOp, bool isMultiTarget, IReadOnlyList<uint> targetCbs, Function function)
+            {
+                int functionId = _hfm.AddFunction(function);
+
+                _entries.Add(new Entry(functionId, baseOp.Inst, baseOp.StorageKind, isMultiTarget, targetCbs));
+
+                return functionId;
+            }
+
+            public bool TryGetFunctionId(Operation baseOp, bool isMultiTarget, IReadOnlyList<uint> targetCbs, out int functionId)
+            {
+                foreach (Entry entry in _entries)
+                {
+                    if (entry.Inst != baseOp.Inst ||
+                        entry.StorageKind != baseOp.StorageKind ||
+                        entry.IsMultiTarget != isMultiTarget ||
+                        entry.TargetCbs.Count != targetCbs.Count)
+                    {
+                        continue;
+                    }
+
+                    bool allEqual = true;
+
+                    for (int index = 0; index < targetCbs.Count; index++)
+                    {
+                        if (targetCbs[index] != entry.TargetCbs[index])
+                        {
+                            allEqual = false;
+                            break;
+                        }
+                    }
+
+                    if (allEqual)
+                    {
+                        functionId = entry.FunctionId;
+                        return true;
+                    }
+                }
+
+                functionId = -1;
+                return false;
+            }
+
+            public void AddMemoryTargetCb(LsMemoryType type, Operand baseOffset, int constOffset, uint targetCb, SearchResult result)
+            {
+                LsKey key = new LsKey(baseOffset, constOffset, type);
+
+                if (!_sharedEntries.TryGetValue(key, out Dictionary<uint, SearchResult> targetCbs))
+                {
+                    // No entry with this base offset, create a new one.
+
+                    targetCbs = new Dictionary<uint, SearchResult>() { { targetCb, result } };
+
+                    _sharedEntries.Add(key, targetCbs);
+                }
+                else if (targetCbs.TryGetValue(targetCb, out SearchResult existingResult))
+                {
+                    // If our entry already exists, but does not match the new result,
+                    // we set the offset to null to indicate there are multiple possible offsets.
+                    // This will be used on the multi-target access that does not need to know the offset.
+
+                    if (existingResult.Offset != null &&
+                        (existingResult.Offset != result.Offset ||
+                        existingResult.ConstOffset != result.ConstOffset))
+                    {
+                        targetCbs[targetCb] = new SearchResult(result.SbCbSlot, result.SbCbOffset);
+                    }
+                }
+                else
+                {
+                    // An entry for this base offset already exists, but not for the specified
+                    // constant buffer region where the storage buffer base address and size
+                    // comes from.
+
+                    targetCbs.Add(targetCb, result);
+                }
+            }
+
+            public bool TryGetMemoryTargetCb(LsMemoryType type, Operand baseOffset, int constOffset, out SearchResult result)
+            {
+                LsKey key = new LsKey(baseOffset, constOffset, type);
+
+                if (_sharedEntries.TryGetValue(key, out Dictionary<uint, SearchResult> targetCbs) && targetCbs.Count == 1)
+                {
+                    SearchResult candidateResult = targetCbs.Values.First();
+
+                    if (candidateResult.Found)
+                    {
+                        result = candidateResult;
+
+                        return true;
+                    }
+                }
+
+                result = default;
+
+                return false;
+            }
+        }
+
         private struct SearchResult
         {
             public static SearchResult NotFound => new SearchResult(-1, 0);
             public bool Found => SbCbSlot != -1;
             public int SbCbSlot { get; }
             public int SbCbOffset { get; }
+            public Operand Offset { get; }
+            public int ConstOffset { get; }
 
             public SearchResult(int sbCbSlot, int sbCbOffset)
             {
                 SbCbSlot = sbCbSlot;
                 SbCbOffset = sbCbOffset;
             }
+
+            public SearchResult(int sbCbSlot, int sbCbOffset, Operand offset, int constOffset = 0)
+            {
+                SbCbSlot = sbCbSlot;
+                SbCbOffset = sbCbOffset;
+                Offset = offset;
+                ConstOffset = constOffset;
+            }
         }
 
-        public static void RunPass(BasicBlock block, ShaderConfig config, ref int sbUseMask, ref int ubeUseMask)
+        public static void RunPass(HelperFunctionManager hfm, BasicBlock[] blocks, ShaderConfig config)
         {
-            int sbStart = GetStorageBaseCbOffset(config.Stage);
-            int sbEnd = sbStart + StorageDescsSize;
+            GtsContext gtsContext = new GtsContext(hfm);
 
-            int ubeStart = UbeBaseOffset;
-            int ubeEnd = UbeBaseOffset + UbeDescsSize;
-
-            for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
+            foreach (BasicBlock block in blocks)
             {
-                for (int index = 0; index < node.Value.SourcesCount; index++)
+                for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
                 {
-                    Operand src = node.Value.GetSource(index);
-
-                    int storageIndex = GetStorageIndex(src, sbStart, sbEnd);
-
-                    if (storageIndex >= 0)
-                    {
-                        sbUseMask |= 1 << storageIndex;
-                    }
-
-                    if (config.Stage == ShaderStage.Compute)
-                    {
-                        int constantIndex = GetStorageIndex(src, ubeStart, ubeEnd);
-
-                        if (constantIndex >= 0)
-                        {
-                            ubeUseMask |= 1 << constantIndex;
-                        }
-                    }
-                }
-
-                if (!(node.Value is Operation operation))
-                {
-                    continue;
-                }
-
-                if (UsesGlobalMemory(operation.Inst, operation.StorageKind))
-                {
-                    Operand source = operation.GetSource(0);
-
-                    var result = SearchForStorageBase(config, block, source);
-                    if (!result.Found)
+                    if (!(node.Value is Operation operation))
                     {
                         continue;
                     }
 
-                    if (config.Stage == ShaderStage.Compute &&
-                        operation.Inst == Instruction.LoadGlobal &&
-                        result.SbCbSlot == DriverReservedCb &&
-                        result.SbCbOffset >= UbeBaseOffset &&
-                        result.SbCbOffset < UbeBaseOffset + UbeDescsSize)
+                    if (IsGlobalMemory(operation.StorageKind))
                     {
-                        // Here we effectively try to replace a LDG instruction with LDC.
-                        // The hardware only supports a limited amount of constant buffers
-                        // so NVN "emulates" more constant buffers using global memory access.
-                        // Here we try to replace the global access back to a constant buffer
-                        // load.
-                        node = ReplaceLdgWithLdc(node, config, (result.SbCbOffset - UbeBaseOffset) / StorageDescSize);
-                    }
-                    else
-                    {
-                        // Storage buffers are implemented using global memory access.
-                        // If we know from where the base address of the access is loaded,
-                        // we can guess which storage buffer it is accessing.
-                        // We can then replace the global memory access with a storage
-                        // buffer access.
-                        node = ReplaceGlobalWithStorage(block, node, config, config.GetSbSlot((byte)result.SbCbSlot, (ushort)result.SbCbOffset));
-                    }
-                }
-            }
+                        LinkedListNode<INode> nextNode = ReplaceGlobalMemoryWithStorage(gtsContext, config, block, node);
 
-            config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask);
-        }
-
-        private static LinkedListNode<INode> ReplaceGlobalWithStorage(BasicBlock block, LinkedListNode<INode> node, ShaderConfig config, int storageIndex)
-        {
-            Operation operation = (Operation)node.Value;
-
-            bool isAtomic = operation.Inst.IsAtomic();
-            bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8;
-            bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8;
-
-            config.SetUsedStorageBuffer(storageIndex, isWrite);
-
-            Operand[] sources = new Operand[operation.SourcesCount];
-
-            sources[0] = Const(storageIndex);
-            sources[1] = GetStorageOffset(block, node, config, storageIndex, operation.GetSource(0), isStg16Or8);
-
-            for (int index = 2; index < operation.SourcesCount; index++)
-            {
-                sources[index] = operation.GetSource(index);
-            }
-
-            Operation storageOp;
-
-            if (isAtomic)
-            {
-                storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources);
-            }
-            else if (operation.Inst == Instruction.LoadGlobal)
-            {
-                storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources);
-            }
-            else
-            {
-                Instruction storeInst = operation.Inst switch
-                {
-                    Instruction.StoreGlobal16 => Instruction.StoreStorage16,
-                    Instruction.StoreGlobal8 => Instruction.StoreStorage8,
-                    _ => Instruction.StoreStorage
-                };
-
-                storageOp = new Operation(storeInst, null, sources);
-            }
-
-            for (int index = 0; index < operation.SourcesCount; index++)
-            {
-                operation.SetSource(index, null);
-            }
-
-            LinkedListNode<INode> oldNode = node;
-
-            node = node.List.AddBefore(node, storageOp);
-
-            node.List.Remove(oldNode);
-
-            return node;
-        }
-
-        private static Operand GetStorageOffset(
-            BasicBlock block,
-            LinkedListNode<INode> node,
-            ShaderConfig config,
-            int storageIndex,
-            Operand addrLow,
-            bool isStg16Or8)
-        {
-            (int sbCbSlot, int sbCbOffset) = config.GetSbCbInfo(storageIndex);
-
-            bool storageAligned = !(config.GpuAccessor.QueryHasUnalignedStorageBuffer() || config.GpuAccessor.QueryHostStorageBufferOffsetAlignment() > Constants.StorageAlignment);
-
-            (Operand byteOffset, int constantOffset) = storageAligned ?
-                GetStorageOffset(block, Utils.FindLastOperation(addrLow, block), sbCbSlot, sbCbOffset) :
-                (null, 0);
-
-            if (byteOffset != null)
-            {
-                ReplaceAddressAlignment(node.List, addrLow, byteOffset, constantOffset);
-            }
-
-            if (byteOffset == null)
-            {
-                Operand baseAddrLow = Cbuf(sbCbSlot, sbCbOffset);
-                Operand baseAddrTrunc = Local();
-
-                Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
-
-                Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask);
-
-                node.List.AddBefore(node, andOp);
-
-                Operand offset = Local();
-                Operation subOp = new Operation(Instruction.Subtract, offset, addrLow, baseAddrTrunc);
-
-                node.List.AddBefore(node, subOp);
-
-                byteOffset = offset;
-            }
-            else if (constantOffset != 0)
-            {
-                Operand offset = Local();
-                Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset));
-
-                node.List.AddBefore(node, addOp);
-
-                byteOffset = offset;
-            }
-
-            if (isStg16Or8)
-            {
-                return byteOffset;
-            }
-
-            Operand wordOffset = Local();
-            Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2));
-
-            node.List.AddBefore(node, shrOp);
-
-            return wordOffset;
-        }
-
-        private static bool IsCbOffset(Operand operand, int slot, int offset)
-        {
-            return operand.Type == OperandType.ConstantBuffer && operand.GetCbufSlot() == slot && operand.GetCbufOffset() == offset;
-        }
-
-        private static void ReplaceAddressAlignment(LinkedList<INode> list, Operand address, Operand byteOffset, int constantOffset)
-        {
-            // When we emit 16/8-bit LDG, we add extra code to determine the address alignment.
-            // Eliminate the storage buffer base address from this too, leaving only the byte offset.
-
-            foreach (INode useNode in address.UseOps)
-            {
-                if (useNode is Operation op && op.Inst == Instruction.BitwiseAnd)
-                {
-                    Operand src1 = op.GetSource(0);
-                    Operand src2 = op.GetSource(1);
-
-                    int addressIndex = -1;
-
-                    if (src1 == address && src2.Type == OperandType.Constant && src2.Value == 3)
-                    {
-                        addressIndex = 0;
-                    }
-                    else if (src2 == address && src1.Type == OperandType.Constant && src1.Value == 3)
-                    {
-                        addressIndex = 1;
-                    }
-
-                    if (addressIndex != -1)
-                    {
-                        LinkedListNode<INode> node = list.Find(op);
-
-                        // Add offset calculation before the use. Needs to be on the same block.
-                        if (node != null)
+                        if (nextNode == null)
                         {
-                            Operand offset = Local();
-                            Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset));
-                            list.AddBefore(node, addOp);
+                            // The returned value being null means that the global memory replacement failed,
+                            // so we just make loads read 0 and stores do nothing.
 
-                            op.SetSource(addressIndex, offset);
+                            config.GpuAccessor.Log($"Failed to reserve storage buffer for global memory operation \"{operation.Inst}\".");
+
+                            if (operation.Dest != null)
+                            {
+                                operation.TurnIntoCopy(Const(0));
+                            }
+                            else
+                            {
+                                Utils.DeleteNode(node, operation);
+                            }
+                        }
+                        else
+                        {
+                            node = nextNode;
+                        }
+                    }
+                    else if (operation.Inst == Instruction.StoreShared || operation.Inst == Instruction.StoreLocal)
+                    {
+                        // The NVIDIA compiler can sometimes use shared or local memory as temporary
+                        // storage to place the base address and size on, so we need
+                        // to be able to find such information stored in memory too.
+
+                        if (TryGetMemoryOffsets(operation, out LsMemoryType type, out Operand baseOffset, out int constOffset))
+                        {
+                            Operand value = operation.GetSource(operation.SourcesCount - 1);
+
+                            var result = FindUniqueBaseAddressCb(gtsContext, block, value, needsOffset: false);
+                            if (result.Found)
+                            {
+                                uint targetCb = PackCbSlotAndOffset(result.SbCbSlot, result.SbCbOffset);
+                                gtsContext.AddMemoryTargetCb(type, baseOffset, constOffset, targetCb, result);
+                            }
                         }
                     }
                 }
             }
         }
 
-        private static (Operand, int) GetStorageOffset(BasicBlock block, Operand address, int cbSlot, int baseAddressCbOffset)
+        private static bool IsGlobalMemory(StorageKind storageKind)
         {
-            if (IsCbOffset(address, cbSlot, baseAddressCbOffset))
-            {
-                // Direct offset: zero.
-                return (Const(0), 0);
-            }
-
-            (address, int constantOffset) = GetStorageConstantOffset(block, address);
-
-            address = Utils.FindLastOperation(address, block);
-
-            if (IsCbOffset(address, cbSlot, baseAddressCbOffset))
-            {
-                // Only constant offset
-                return (Const(0), constantOffset);
-            }
-
-            if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add)
-            {
-                return (null, 0);
-            }
-
-            Operand src1 = offsetAdd.GetSource(0);
-            Operand src2 = Utils.FindLastOperation(offsetAdd.GetSource(1), block);
-
-            if (IsCbOffset(src2, cbSlot, baseAddressCbOffset))
-            {
-                return (src1, constantOffset);
-            }
-            else if (IsCbOffset(src1, cbSlot, baseAddressCbOffset))
-            {
-                return (src2, constantOffset);
-            }
-
-            return (null, 0);
+            return storageKind == StorageKind.GlobalMemory ||
+                   storageKind == StorageKind.GlobalMemoryS8 ||
+                   storageKind == StorageKind.GlobalMemoryS16 ||
+                   storageKind == StorageKind.GlobalMemoryU8 ||
+                   storageKind == StorageKind.GlobalMemoryU16;
         }
 
-        private static (Operand, int) GetStorageConstantOffset(BasicBlock block, Operand address)
+        private static bool IsSmallInt(StorageKind storageKind)
         {
-            if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add)
-            {
-                return (address, 0);
-            }
-
-            Operand src1 = offsetAdd.GetSource(0);
-            Operand src2 = offsetAdd.GetSource(1);
-
-            if (src2.Type != OperandType.Constant)
-            {
-                return (address, 0);
-            }
-
-            return (src1, src2.Value);
+            return storageKind == StorageKind.GlobalMemoryS8 ||
+                   storageKind == StorageKind.GlobalMemoryS16 ||
+                   storageKind == StorageKind.GlobalMemoryU8 ||
+                   storageKind == StorageKind.GlobalMemoryU16;
         }
 
-        private static LinkedListNode<INode> ReplaceLdgWithLdc(LinkedListNode<INode> node, ShaderConfig config, int storageIndex)
+        private static LinkedListNode<INode> ReplaceGlobalMemoryWithStorage(
+            GtsContext gtsContext,
+            ShaderConfig config,
+            BasicBlock block,
+            LinkedListNode<INode> node)
         {
-            Operation operation = (Operation)node.Value;
+            Operation operation = node.Value as Operation;
+            Operand globalAddress = operation.GetSource(0);
+            SearchResult result = FindUniqueBaseAddressCb(gtsContext, block, globalAddress, needsOffset: true);
 
-            Operand GetCbufOffset()
+            if (result.Found)
             {
-                Operand addrLow = operation.GetSource(0);
+                // We found the storage buffer that is being accessed.
+                // There are two possible paths here, if the operation is simple enough,
+                // we just generate the storage access code inline.
+                // Otherwise, we generate a function call (and the function if necessary).
 
-                Operand baseAddrLow = Cbuf(0, UbeBaseOffset + storageIndex * StorageDescSize);
+                Operand offset = result.Offset;
 
-                Operand baseAddrTrunc = Local();
+                bool storageUnaligned = config.GpuAccessor.QueryHasUnalignedStorageBuffer();
 
-                Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
+                if (storageUnaligned)
+                {
+                    Operand baseAddress = Cbuf(result.SbCbSlot, result.SbCbOffset);
 
-                Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask);
+                    Operand baseAddressMasked = Local();
+                    Operand hostOffset = Local();
 
-                node.List.AddBefore(node, andOp);
+                    int alignment = config.GpuAccessor.QueryHostStorageBufferOffsetAlignment();
 
-                Operand byteOffset = Local();
-                Operand wordOffset = Local();
+                    Operation maskOp = new Operation(Instruction.BitwiseAnd, baseAddressMasked, new[] { baseAddress, Const(-alignment) });
+                    Operation subOp = new Operation(Instruction.Subtract, hostOffset, new[] { globalAddress, baseAddressMasked });
 
-                Operation subOp = new Operation(Instruction.Subtract, byteOffset, addrLow, baseAddrTrunc);
-                Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2));
+                    node.List.AddBefore(node, maskOp);
+                    node.List.AddBefore(node, subOp);
 
-                node.List.AddBefore(node, subOp);
-                node.List.AddBefore(node, shrOp);
+                    offset = hostOffset;
+                }
+                else if (result.ConstOffset != 0)
+                {
+                    Operand newOffset = Local();
 
-                return wordOffset;
+                    Operation addOp = new Operation(Instruction.Add, newOffset, new[] { offset, Const(result.ConstOffset) });
+
+                    node.List.AddBefore(node, addOp);
+
+                    offset = newOffset;
+                }
+
+                if (CanUseInlineStorageOp(operation, config.Options.TargetLanguage))
+                {
+                    return GenerateInlineStorageOp(config, node, operation, offset, result);
+                }
+                else
+                {
+                    if (!TryGenerateSingleTargetStorageOp(gtsContext, config, operation, result, out int functionId))
+                    {
+                        return null;
+                    }
+
+                    return GenerateCallStorageOp(node, operation, offset, functionId);
+                }
             }
-
-            Operand cbufOffset = GetCbufOffset();
-            Operand vecIndex = Local();
-            Operand elemIndex = Local();
-
-            node.List.AddBefore(node, new Operation(Instruction.ShiftRightU32, 0, vecIndex, cbufOffset, Const(2)));
-            node.List.AddBefore(node, new Operation(Instruction.BitwiseAnd, 0, elemIndex, cbufOffset, Const(3)));
-
-            Operand[] sources = new Operand[4];
-
-            int cbSlot = UbeFirstCbuf + storageIndex;
-
-            sources[0] = Const(config.ResourceManager.GetConstantBufferBinding(cbSlot));
-            sources[1] = Const(0);
-            sources[2] = vecIndex;
-            sources[3] = elemIndex;
-
-            Operation ldcOp = new Operation(Instruction.Load, StorageKind.ConstantBuffer, operation.Dest, sources);
-
-            for (int index = 0; index < operation.SourcesCount; index++)
+            else
             {
-                operation.SetSource(index, null);
+                // Failed to find the storage buffer directly.
+                // Try to walk through Phi chains and find all possible constant buffers where
+                // the base address might be stored.
+                // Generate a helper function that will check all possible storage buffers and use the right one.
+
+                if (!TryGenerateMultiTargetStorageOp(gtsContext, config, block, operation, out int functionId))
+                {
+                    return null;
+                }
+
+                return GenerateCallStorageOp(node, operation, null, functionId);
             }
-
-            LinkedListNode<INode> oldNode = node;
-
-            node = node.List.AddBefore(node, ldcOp);
-
-            node.List.Remove(oldNode);
-
-            return node;
         }
 
-        private static SearchResult SearchForStorageBase(ShaderConfig config, BasicBlock block, Operand globalAddress)
+        private static bool CanUseInlineStorageOp(Operation operation, TargetLanguage targetLanguage)
+        {
+            if (operation.StorageKind != StorageKind.GlobalMemory)
+            {
+                return false;
+            }
+
+            return (operation.Inst != Instruction.AtomicMaxS32 &&
+                    operation.Inst != Instruction.AtomicMinS32) || targetLanguage == TargetLanguage.Spirv;
+        }
+
+        private static LinkedListNode<INode> GenerateInlineStorageOp(
+            ShaderConfig config,
+            LinkedListNode<INode> node,
+            Operation operation,
+            Operand offset,
+            SearchResult result)
+        {
+            bool isStore = operation.Inst == Instruction.Store || operation.Inst.IsAtomic();
+            if (!config.ResourceManager.TryGetStorageBufferBinding(result.SbCbSlot, result.SbCbOffset, isStore, out int binding))
+            {
+                return null;
+            }
+
+            Operand wordOffset = Local();
+
+            Operand[] sources;
+
+            if (operation.Inst == Instruction.AtomicCompareAndSwap)
+            {
+                sources = new Operand[]
+                {
+                    Const(binding),
+                    Const(0),
+                    wordOffset,
+                    operation.GetSource(operation.SourcesCount - 2),
+                    operation.GetSource(operation.SourcesCount - 1)
+                };
+            }
+            else if (isStore)
+            {
+                sources = new Operand[] { Const(binding), Const(0), wordOffset, operation.GetSource(operation.SourcesCount - 1) };
+            }
+            else
+            {
+                sources = new Operand[] { Const(binding), Const(0), wordOffset };
+            }
+
+            Operation shiftOp = new Operation(Instruction.ShiftRightU32, wordOffset, new[] { offset, Const(2) });
+            Operation storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources);
+
+            node.List.AddBefore(node, shiftOp);
+            LinkedListNode<INode> newNode = node.List.AddBefore(node, storageOp);
+
+            Utils.DeleteNode(node, operation);
+
+            return newNode;
+        }
+
+        private static LinkedListNode<INode> GenerateCallStorageOp(LinkedListNode<INode> node, Operation operation, Operand offset, int functionId)
+        {
+            // Generate call to a helper function that will perform the storage buffer operation.
+
+            Operand[] sources = new Operand[operation.SourcesCount - 1 + (offset == null ? 2 : 1)];
+
+            sources[0] = Const(functionId);
+
+            if (offset != null)
+            {
+                // If the offset was supplised, we use that and skip the global address.
+
+                sources[1] = offset;
+
+                for (int srcIndex = 2; srcIndex < operation.SourcesCount; srcIndex++)
+                {
+                    sources[srcIndex] = operation.GetSource(srcIndex);
+                }
+            }
+            else
+            {
+                // Use the 64-bit global address which is split in 2 32-bit arguments.
+
+                for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
+                {
+                    sources[srcIndex + 1] = operation.GetSource(srcIndex);
+                }
+            }
+
+            bool returnsValue = operation.Dest != null;
+            Operand returnValue = returnsValue ? Local() : null;
+
+            Operation callOp = new Operation(Instruction.Call, returnValue, sources);
+
+            LinkedListNode<INode> newNode = node.List.AddBefore(node, callOp);
+
+            if (returnsValue)
+            {
+                operation.TurnIntoCopy(returnValue);
+
+                return node;
+            }
+            else
+            {
+                Utils.DeleteNode(node, operation);
+
+                return newNode;
+            }
+        }
+
+        private static bool TryGenerateSingleTargetStorageOp(
+            GtsContext gtsContext,
+            ShaderConfig config,
+            Operation operation,
+            SearchResult result,
+            out int functionId)
+        {
+            List<uint> targetCbs = new List<uint>() { PackCbSlotAndOffset(result.SbCbSlot, result.SbCbOffset) };
+
+            if (gtsContext.TryGetFunctionId(operation, isMultiTarget: false, targetCbs, out functionId))
+            {
+                return true;
+            }
+
+            int inArgumentsCount = 1;
+
+            if (operation.Inst == Instruction.AtomicCompareAndSwap)
+            {
+                inArgumentsCount = 3;
+            }
+            else if (operation.Inst == Instruction.Store || operation.Inst.IsAtomic())
+            {
+                inArgumentsCount = 2;
+            }
+
+            EmitterContext context = new EmitterContext();
+
+            Operand offset = Argument(0);
+            Operand compare = null;
+            Operand value = null;
+
+            if (inArgumentsCount == 3)
+            {
+                compare = Argument(1);
+                value = Argument(2);
+            }
+            else if (inArgumentsCount == 2)
+            {
+                value = Argument(1);
+            }
+
+            if (!TryGenerateStorageOp(
+                config,
+                context,
+                operation.Inst,
+                operation.StorageKind,
+                offset,
+                compare,
+                value,
+                result,
+                out Operand resultValue))
+            {
+                functionId = 0;
+                return false;
+            }
+
+            bool returnsValue = resultValue != null;
+
+            if (returnsValue)
+            {
+                context.Return(resultValue);
+            }
+            else
+            {
+                context.Return();
+            }
+
+            string functionName = GetFunctionName(operation, isMultiTarget: false, targetCbs);
+
+            Function function = new Function(
+                ControlFlowGraph.Create(context.GetOperations()).Blocks,
+                functionName,
+                returnsValue,
+                inArgumentsCount,
+                0);
+
+            functionId = gtsContext.AddFunction(operation, isMultiTarget: false, targetCbs, function);
+
+            return true;
+        }
+
+        private static bool TryGenerateMultiTargetStorageOp(
+            GtsContext gtsContext,
+            ShaderConfig config,
+            BasicBlock block,
+            Operation operation,
+            out int functionId)
+        {
+            Queue<PhiNode> phis = new Queue<PhiNode>();
+            HashSet<PhiNode> visited = new HashSet<PhiNode>();
+            List<uint> targetCbs = new List<uint>();
+
+            Operand globalAddress = operation.GetSource(0);
+
+            if (globalAddress.AsgOp is Operation addOp && addOp.Inst == Instruction.Add)
+            {
+                Operand src1 = addOp.GetSource(0);
+                Operand src2 = addOp.GetSource(1);
+
+                if (src1.Type == OperandType.Constant && src2.Type == OperandType.LocalVariable)
+                {
+                    globalAddress = src2;
+                }
+                else if (src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant)
+                {
+                    globalAddress = src1;
+                }
+            }
+
+            if (globalAddress.AsgOp is PhiNode phi && visited.Add(phi))
+            {
+                phis.Enqueue(phi);
+            }
+            else
+            {
+                SearchResult result = FindUniqueBaseAddressCb(gtsContext, block, operation.GetSource(0), needsOffset: false);
+
+                if (result.Found)
+                {
+                    targetCbs.Add(PackCbSlotAndOffset(result.SbCbSlot, result.SbCbOffset));
+                }
+            }
+
+            while (phis.TryDequeue(out phi))
+            {
+                for (int srcIndex = 0; srcIndex < phi.SourcesCount; srcIndex++)
+                {
+                    BasicBlock phiBlock = phi.GetBlock(srcIndex);
+                    Operand phiSource = phi.GetSource(srcIndex);
+
+                    SearchResult result = FindUniqueBaseAddressCb(gtsContext, phiBlock, phiSource, needsOffset: false);
+
+                    if (result.Found)
+                    {
+                        uint targetCb = PackCbSlotAndOffset(result.SbCbSlot, result.SbCbOffset);
+
+                        if (!targetCbs.Contains(targetCb))
+                        {
+                            targetCbs.Add(targetCb);
+                        }
+                    }
+                    else if (phiSource.AsgOp is PhiNode phi2 && visited.Add(phi2))
+                    {
+                        phis.Enqueue(phi2);
+                    }
+                }
+            }
+
+            targetCbs.Sort();
+
+            if (targetCbs.Count == 0)
+            {
+                config.GpuAccessor.Log($"Failed to find storage buffer for global memory operation \"{operation.Inst}\".");
+            }
+
+            if (gtsContext.TryGetFunctionId(operation, isMultiTarget: true, targetCbs, out functionId))
+            {
+                return true;
+            }
+
+            int inArgumentsCount = 2;
+
+            if (operation.Inst == Instruction.AtomicCompareAndSwap)
+            {
+                inArgumentsCount = 4;
+            }
+            else if (operation.Inst == Instruction.Store || operation.Inst.IsAtomic())
+            {
+                inArgumentsCount = 3;
+            }
+
+            EmitterContext context = new EmitterContext();
+
+            Operand globalAddressLow = Argument(0);
+            Operand globalAddressHigh = Argument(1);
+
+            foreach (uint targetCb in targetCbs)
+            {
+                (int sbCbSlot, int sbCbOffset) = UnpackCbSlotAndOffset(targetCb);
+
+                Operand baseAddrLow = Cbuf(sbCbSlot, sbCbOffset);
+                Operand baseAddrHigh = Cbuf(sbCbSlot, sbCbOffset + 1);
+                Operand size = Cbuf(sbCbSlot, sbCbOffset + 2);
+
+                Operand offset = context.ISubtract(globalAddressLow, baseAddrLow);
+                Operand borrow = context.ICompareLessUnsigned(globalAddressLow, baseAddrLow);
+
+                Operand inRangeLow = context.ICompareLessUnsigned(offset, size);
+
+                Operand addrHighBorrowed = context.IAdd(globalAddressHigh, borrow);
+
+                Operand inRangeHigh = context.ICompareEqual(addrHighBorrowed, baseAddrHigh);
+
+                Operand inRange = context.BitwiseAnd(inRangeLow, inRangeHigh);
+
+                Operand lblSkip = Label();
+                context.BranchIfFalse(lblSkip, inRange);
+
+                Operand compare = null;
+                Operand value = null;
+
+                if (inArgumentsCount == 4)
+                {
+                    compare = Argument(2);
+                    value = Argument(3);
+                }
+                else if (inArgumentsCount == 3)
+                {
+                    value = Argument(2);
+                }
+
+                SearchResult result = new SearchResult(sbCbSlot, sbCbOffset);
+
+                int alignment = config.GpuAccessor.QueryHostStorageBufferOffsetAlignment();
+
+                Operand baseAddressMasked = context.BitwiseAnd(baseAddrLow, Const(-alignment));
+                Operand hostOffset = context.ISubtract(globalAddressLow, baseAddressMasked);
+
+                if (!TryGenerateStorageOp(
+                    config,
+                    context,
+                    operation.Inst,
+                    operation.StorageKind,
+                    hostOffset,
+                    compare,
+                    value,
+                    result,
+                    out Operand resultValue))
+                {
+                    functionId = 0;
+                    return false;
+                }
+
+                if (resultValue != null)
+                {
+                    context.Return(resultValue);
+                }
+                else
+                {
+                    context.Return();
+                }
+
+                context.MarkLabel(lblSkip);
+            }
+
+            bool returnsValue = operation.Dest != null;
+
+            if (returnsValue)
+            {
+                context.Return(Const(0));
+            }
+            else
+            {
+                context.Return();
+            }
+
+            string functionName = GetFunctionName(operation, isMultiTarget: true, targetCbs);
+
+            Function function = new Function(
+                ControlFlowGraph.Create(context.GetOperations()).Blocks,
+                functionName,
+                returnsValue,
+                inArgumentsCount,
+                0);
+
+            functionId = gtsContext.AddFunction(operation, isMultiTarget: true, targetCbs, function);
+
+            return true;
+        }
+
+        private static uint PackCbSlotAndOffset(int cbSlot, int cbOffset)
+        {
+            return (uint)((ushort)cbSlot | ((ushort)cbOffset << 16));
+        }
+
+        private static (int, int) UnpackCbSlotAndOffset(uint packed)
+        {
+            return ((ushort)packed, (ushort)(packed >> 16));
+        }
+
+        private static string GetFunctionName(Operation baseOp, bool isMultiTarget, IReadOnlyList<uint> targetCbs)
+        {
+            string name = baseOp.Inst.ToString();
+
+            name += baseOp.StorageKind switch
+            {
+                StorageKind.GlobalMemoryS8 => "S8",
+                StorageKind.GlobalMemoryS16 => "S16",
+                StorageKind.GlobalMemoryU8 => "U8",
+                StorageKind.GlobalMemoryU16 => "U16",
+                _ => string.Empty
+            };
+
+            if (isMultiTarget)
+            {
+                name += "Multi";
+            }
+
+            foreach (uint targetCb in targetCbs)
+            {
+                (int sbCbSlot, int sbCbOffset) = UnpackCbSlotAndOffset(targetCb);
+
+                name += $"_c{sbCbSlot}o{sbCbOffset}";
+            }
+
+            return name;
+        }
+
+        private static bool TryGenerateStorageOp(
+            ShaderConfig config,
+            EmitterContext context,
+            Instruction inst,
+            StorageKind storageKind,
+            Operand offset,
+            Operand compare,
+            Operand value,
+            SearchResult result,
+            out Operand resultValue)
+        {
+            resultValue = null;
+            bool isStore = inst.IsAtomic() || inst == Instruction.Store;
+
+            if (!config.ResourceManager.TryGetStorageBufferBinding(result.SbCbSlot, result.SbCbOffset, isStore, out int binding))
+            {
+                return false;
+            }
+
+            Operand wordOffset = context.ShiftRightU32(offset, Const(2));
+
+            if (inst.IsAtomic())
+            {
+                if (IsSmallInt(storageKind))
+                {
+                    throw new NotImplementedException();
+                }
+
+                switch (inst)
+                {
+                    case Instruction.AtomicAdd:
+                        resultValue = context.AtomicAdd(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
+                        break;
+                    case Instruction.AtomicAnd:
+                        resultValue = context.AtomicAnd(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
+                        break;
+                    case Instruction.AtomicCompareAndSwap:
+                        resultValue = context.AtomicCompareAndSwap(StorageKind.StorageBuffer, binding, Const(0), wordOffset, compare, value);
+                        break;
+                    case Instruction.AtomicMaxS32:
+                        if (config.Options.TargetLanguage == TargetLanguage.Spirv)
+                        {
+                            resultValue = context.AtomicMaxS32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
+                        }
+                        else
+                        {
+                            resultValue = GenerateAtomicCasLoop(context, wordOffset, binding, (memValue) =>
+                            {
+                                return context.IMaximumS32(memValue, value);
+                            });
+                        }
+                        break;
+                    case Instruction.AtomicMaxU32:
+                        resultValue = context.AtomicMaxU32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
+                        break;
+                    case Instruction.AtomicMinS32:
+                        if (config.Options.TargetLanguage == TargetLanguage.Spirv)
+                        {
+                            resultValue = context.AtomicMinS32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
+                        }
+                        else
+                        {
+                            resultValue = GenerateAtomicCasLoop(context, wordOffset, binding, (memValue) =>
+                            {
+                                return context.IMinimumS32(memValue, value);
+                            });
+                        }
+                        break;
+                    case Instruction.AtomicMinU32:
+                        resultValue = context.AtomicMinU32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
+                        break;
+                    case Instruction.AtomicOr:
+                        resultValue = context.AtomicOr(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
+                        break;
+                    case Instruction.AtomicSwap:
+                        resultValue = context.AtomicSwap(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
+                        break;
+                    case Instruction.AtomicXor:
+                        resultValue = context.AtomicXor(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
+                        break;
+                }
+            }
+            else if (inst == Instruction.Store)
+            {
+                int bitSize = storageKind switch
+                {
+                    StorageKind.GlobalMemoryS8 or
+                    StorageKind.GlobalMemoryU8 => 8,
+                    StorageKind.GlobalMemoryS16 or
+                    StorageKind.GlobalMemoryU16 => 16,
+                    _ => 32
+                };
+
+                if (bitSize < 32)
+                {
+                    Operand bitOffset = GetBitOffset(context, offset);
+
+                    GenerateAtomicCasLoop(context, wordOffset, binding, (memValue) =>
+                    {
+                        return context.BitfieldInsert(memValue, value, bitOffset, Const(bitSize));
+                    });
+                }
+                else
+                {
+                    context.Store(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
+                }
+            }
+            else
+            {
+                value = context.Load(StorageKind.StorageBuffer, binding, Const(0), wordOffset);
+
+                if (IsSmallInt(storageKind))
+                {
+                    Operand bitOffset = GetBitOffset(context, offset);
+
+                    switch (storageKind)
+                    {
+                        case StorageKind.GlobalMemoryS8:
+                            value = context.ShiftRightS32(value, bitOffset);
+                            value = context.BitfieldExtractS32(value, Const(0), Const(8));
+                            break;
+                        case StorageKind.GlobalMemoryS16:
+                            value = context.ShiftRightS32(value, bitOffset);
+                            value = context.BitfieldExtractS32(value, Const(0), Const(16));
+                            break;
+                        case StorageKind.GlobalMemoryU8:
+                            value = context.ShiftRightU32(value, bitOffset);
+                            value = context.BitwiseAnd(value, Const(byte.MaxValue));
+                            break;
+                        case StorageKind.GlobalMemoryU16:
+                            value = context.ShiftRightU32(value, bitOffset);
+                            value = context.BitwiseAnd(value, Const(ushort.MaxValue));
+                            break;
+                    }
+                }
+
+                resultValue = value;
+            }
+
+            return true;
+        }
+
+        private static Operand GetBitOffset(EmitterContext context, Operand offset)
+        {
+            return context.ShiftLeft(context.BitwiseAnd(offset, Const(3)), Const(3));
+        }
+
+        private static Operand GenerateAtomicCasLoop(EmitterContext context, Operand wordOffset, int binding, Func<Operand, Operand> opCallback)
+        {
+            Operand lblLoopHead = Label();
+
+            context.MarkLabel(lblLoopHead);
+
+            Operand oldValue = context.Load(StorageKind.StorageBuffer, binding, Const(0), wordOffset);
+            Operand newValue = opCallback(oldValue);
+
+            Operand casResult = context.AtomicCompareAndSwap(
+                StorageKind.StorageBuffer,
+                binding,
+                Const(0),
+                wordOffset,
+                oldValue,
+                newValue);
+
+            Operand casFail = context.ICompareNotEqual(casResult, oldValue);
+
+            context.BranchIfTrue(lblLoopHead, casFail);
+
+            return oldValue;
+        }
+
+        private static SearchResult FindUniqueBaseAddressCb(GtsContext gtsContext, BasicBlock block, Operand globalAddress, bool needsOffset)
         {
             globalAddress = Utils.FindLastOperation(globalAddress, block);
 
             if (globalAddress.Type == OperandType.ConstantBuffer)
             {
-                return GetStorageIndex(config, globalAddress);
+                return GetBaseAddressCbWithOffset(globalAddress, Const(0), 0);
             }
 
             Operation operation = globalAddress.AsgOp as Operation;
 
             if (operation == null || operation.Inst != Instruction.Add)
             {
-                return SearchResult.NotFound;
+                return FindBaseAddressCbFromMemory(gtsContext, operation, 0, needsOffset);
             }
 
             Operand src1 = operation.GetSource(0);
             Operand src2 = operation.GetSource(1);
 
+            int constOffset = 0;
+
             if ((src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant) ||
                 (src2.Type == OperandType.LocalVariable && src1.Type == OperandType.Constant))
             {
                 Operand baseAddr;
+                Operand offset;
 
                 if (src1.Type == OperandType.LocalVariable)
                 {
                     baseAddr = Utils.FindLastOperation(src1, block);
+                    offset = src2;
                 }
                 else
                 {
                     baseAddr = Utils.FindLastOperation(src2, block);
+                    offset = src1;
                 }
 
-                var result = GetStorageIndex(config, baseAddr);
+                var result = GetBaseAddressCbWithOffset(baseAddr, offset, 0);
                 if (result.Found)
                 {
                     return result;
                 }
 
+                constOffset = offset.Value;
                 operation = baseAddr.AsgOp as Operation;
 
                 if (operation == null || operation.Inst != Instruction.Add)
                 {
-                    return SearchResult.NotFound;
+                    return FindBaseAddressCbFromMemory(gtsContext, operation, constOffset, needsOffset);
                 }
             }
 
-            var selectedResult = SearchResult.NotFound;
+            src1 = operation.GetSource(0);
+            src2 = operation.GetSource(1);
 
-            for (int index = 0; index < operation.SourcesCount; index++)
+            // If we have two possible results, we give preference to the ones from
+            // the driver reserved constant buffer, as those are the ones that
+            // contains the base address.
+
+            // If both are constant buffer, give preference to the second operand,
+            // because constant buffer are always encoded as the second operand,
+            // so the second operand will always be the one from the last instruction.
+
+            if (src1.Type != OperandType.ConstantBuffer ||
+                (src1.Type == OperandType.ConstantBuffer && src2.Type == OperandType.ConstantBuffer) ||
+                (src2.Type == OperandType.ConstantBuffer && src2.GetCbufSlot() == DriverReservedCb))
             {
-                Operand source = operation.GetSource(index);
-
-                var result = GetStorageIndex(config, source);
-
-                // If we already have a result, we give preference to the ones from
-                // the driver reserved constant buffer, as those are the ones that
-                // contains the base address.
-                if (result.Found && (!selectedResult.Found || result.SbCbSlot == GlobalMemory.DriverReservedCb))
-                {
-                    selectedResult = result;
-                }
+                return GetBaseAddressCbWithOffset(src2, src1, constOffset);
             }
 
-            return selectedResult;
+            return GetBaseAddressCbWithOffset(src1, src2, constOffset);
         }
 
-        private static SearchResult GetStorageIndex(ShaderConfig config, Operand operand)
+        private static SearchResult FindBaseAddressCbFromMemory(GtsContext gtsContext, Operation operation, int constOffset, bool needsOffset)
         {
-            if (operand.Type == OperandType.ConstantBuffer)
+            if (operation != null)
             {
-                int slot = operand.GetCbufSlot();
-                int offset = operand.GetCbufOffset();
-
-                if ((offset & 3) == 0)
+                if (TryGetMemoryOffsets(operation, out LsMemoryType type, out Operand bo, out int co) &&
+                    gtsContext.TryGetMemoryTargetCb(type, bo, co, out SearchResult result) &&
+                    (result.Offset != null || !needsOffset))
                 {
-                    return new SearchResult(slot, offset);
+                    if (constOffset != 0)
+                    {
+                        return new SearchResult(
+                            result.SbCbSlot,
+                            result.SbCbOffset,
+                            result.Offset,
+                            result.ConstOffset + constOffset);
+                    }
+
+                    return result;
                 }
             }
 
             return SearchResult.NotFound;
         }
 
-        private static int GetStorageIndex(Operand operand, int sbStart, int sbEnd)
+        private static SearchResult GetBaseAddressCbWithOffset(Operand baseAddress, Operand offset, int constOffset)
         {
-            if (operand.Type == OperandType.ConstantBuffer)
+            if (baseAddress.Type == OperandType.ConstantBuffer)
             {
-                int slot = operand.GetCbufSlot();
-                int offset = operand.GetCbufOffset();
+                int sbCbSlot = baseAddress.GetCbufSlot();
+                int sbCbOffset = baseAddress.GetCbufOffset();
 
-                if (slot == 0 && offset >= sbStart && offset < sbEnd)
+                // We require the offset to be aligned to 1 word (64 bits),
+                // since the address size is 64-bit and the GPU only supports aligned memory access.
+                if ((sbCbOffset & 1) == 0)
                 {
-                    int storageIndex = (offset - sbStart) / StorageDescSize;
-
-                    return storageIndex;
+                    return new SearchResult(sbCbSlot, sbCbOffset, offset, constOffset);
                 }
             }
 
-            return -1;
+            return SearchResult.NotFound;
+        }
+
+        private static bool TryGetMemoryOffsets(Operation operation, out LsMemoryType type, out Operand baseOffset, out int constOffset)
+        {
+            baseOffset = null;
+
+            if (operation.Inst == Instruction.LoadShared || operation.Inst == Instruction.StoreShared)
+            {
+                type = LsMemoryType.Shared;
+                return TryGetSharedMemoryOffsets(operation, out baseOffset, out constOffset);
+            }
+            else if (operation.Inst == Instruction.LoadLocal || operation.Inst == Instruction.StoreLocal)
+            {
+                type = LsMemoryType.Local;
+                return TryGetLocalMemoryOffset(operation, out constOffset);
+            }
+
+            type = default;
+            constOffset = 0;
+            return false;
+        }
+
+        private static bool TryGetSharedMemoryOffsets(Operation operation, out Operand baseOffset, out int constOffset)
+        {
+            baseOffset = null;
+            constOffset = 0;
+
+            // The byte offset is right shifted by 2 to get the 32-bit word offset,
+            // so we want to get the byte offset back, since each one of those word
+            // offsets are a new "local variable" which will not match.
+
+            if (operation.GetSource(0).AsgOp is Operation shiftRightOp &&
+                shiftRightOp.Inst == Instruction.ShiftRightU32 &&
+                shiftRightOp.GetSource(1).Type == OperandType.Constant &&
+                shiftRightOp.GetSource(1).Value == 2)
+            {
+                baseOffset = shiftRightOp.GetSource(0);
+            }
+
+            // Check if we have a constant offset being added to the base offset.
+
+            if (baseOffset?.AsgOp is Operation addOp && addOp.Inst == Instruction.Add)
+            {
+                Operand src1 = addOp.GetSource(0);
+                Operand src2 = addOp.GetSource(1);
+
+                if (src1.Type == OperandType.Constant && src2.Type == OperandType.LocalVariable)
+                {
+                    constOffset = src1.Value;
+                    baseOffset = src2;
+                }
+                else if (src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant)
+                {
+                    baseOffset = src1;
+                    constOffset = src2.Value;
+                }
+            }
+
+            return baseOffset != null && baseOffset.Type == OperandType.LocalVariable;
+        }
+
+        private static bool TryGetLocalMemoryOffset(Operation operation, out int constOffset)
+        {
+            if (operation.GetSource(0).Type == OperandType.Constant)
+            {
+                constOffset = operation.GetSource(0).Value;
+                return true;
+            }
+
+            constOffset = 0;
+            return false;
         }
     }
 }
\ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs
index b126e2c481..bdb3a62ece 100644
--- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs
+++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs
@@ -7,17 +7,15 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
 {
     static class Optimizer
     {
-        public static void RunPass(BasicBlock[] blocks, ShaderConfig config)
+        public static void RunPass(HelperFunctionManager hfm, BasicBlock[] blocks, ShaderConfig config)
         {
             RunOptimizationPasses(blocks, config);
 
-            int sbUseMask = 0;
-            int ubeUseMask = 0;
+            GlobalToStorage.RunPass(hfm, blocks, config);
 
             // Those passes are looking for specific patterns and only needs to run once.
             for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
             {
-                GlobalToStorage.RunPass(blocks[blkIndex], config, ref sbUseMask, ref ubeUseMask);
                 BindlessToIndexed.RunPass(blocks[blkIndex], config);
                 BindlessElimination.RunPass(blocks[blkIndex], config);
 
@@ -28,8 +26,6 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
                 }
             }
 
-            config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask);
-
             // Run optimizations one last time to remove any code that is now optimizable after above passes.
             RunOptimizationPasses(blocks, config);
         }
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs
index 8d05f99afa..9b78c8aaa5 100644
--- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs
+++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs
@@ -13,7 +13,6 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
             switch (operation.Inst)
             {
                 case Instruction.Add:
-                case Instruction.BitwiseExclusiveOr:
                     TryEliminateBinaryOpCommutative(operation, 0);
                     break;
 
@@ -21,6 +20,13 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
                     TryEliminateBitwiseAnd(operation);
                     break;
 
+                case Instruction.BitwiseExclusiveOr:
+                    if (!TryEliminateXorSwap(operation))
+                    {
+                        TryEliminateBinaryOpCommutative(operation, 0);
+                    }
+                    break;
+
                 case Instruction.BitwiseOr:
                     TryEliminateBitwiseOr(operation);
                     break;
@@ -49,8 +55,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
         private static void TryEliminateBitwiseAnd(Operation operation)
         {
             // Try to recognize and optimize those 3 patterns (in order):
-            // x & 0xFFFFFFFF == x,          0xFFFFFFFF & y == y,
-            // x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000
+            //  x & 0xFFFFFFFF == x,          0xFFFFFFFF & y == y,
+            //  x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000
+
             Operand x = operation.GetSource(0);
             Operand y = operation.GetSource(1);
 
@@ -68,11 +75,62 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
             }
         }
 
+        private static bool TryEliminateXorSwap(Operation xCopyOp)
+        {
+            // Try to recognize XOR swap pattern:
+            //  x = x ^ y
+            //  y = x ^ y
+            //  x = x ^ y
+            // Or, in SSA:
+            //  x2 = x ^ y
+            //  y2 = x2 ^ y
+            //  x3 = x2 ^ y2
+            // Transform it into something more sane:
+            //  temp = y
+            //  y = x
+            //  x = temp
+
+            // Note that because XOR is commutative, there are actually
+            // multiple possible combinations of this pattern, for
+            // simplicity this only catches one of them.
+
+            Operand x = xCopyOp.GetSource(0);
+            Operand y = xCopyOp.GetSource(1);
+
+            if (x.AsgOp is not Operation tCopyOp || tCopyOp.Inst != Instruction.BitwiseExclusiveOr ||
+                y.AsgOp is not Operation yCopyOp || yCopyOp.Inst != Instruction.BitwiseExclusiveOr)
+            {
+                return false;
+            }
+
+            if (tCopyOp == yCopyOp)
+            {
+                return false;
+            }
+
+            if (yCopyOp.GetSource(0) != x ||
+                yCopyOp.GetSource(1) != tCopyOp.GetSource(1) ||
+                x.UseOps.Count != 2)
+            {
+                return false;
+            }
+
+            x = tCopyOp.GetSource(0);
+            y = tCopyOp.GetSource(1);
+
+            tCopyOp.TurnIntoCopy(y); // Temp = Y
+            yCopyOp.TurnIntoCopy(x); // Y = X
+            xCopyOp.TurnIntoCopy(tCopyOp.Dest); // X = Temp
+
+            return true;
+        }
+
         private static void TryEliminateBitwiseOr(Operation operation)
         {
             // Try to recognize and optimize those 3 patterns (in order):
-            // x | 0x00000000 == x,          0x00000000 | y == y,
-            // x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF
+            //  x | 0x00000000 == x,          0x00000000 | y == y,
+            //  x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF
+
             Operand x = operation.GetSource(0);
             Operand y = operation.GetSource(1);
 
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs
index a0d58d0793..ffbd16f853 100644
--- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs
+++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs
@@ -1,4 +1,5 @@
 using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System.Collections.Generic;
 
 namespace Ryujinx.Graphics.Shader.Translation.Optimizations
 {
@@ -93,5 +94,17 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
 
             return source;
         }
+
+        public static void DeleteNode(LinkedListNode<INode> node, Operation operation)
+        {
+            node.List.Remove(node);
+
+            for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
+            {
+                operation.SetSource(srcIndex, null);
+            }
+
+            operation.Dest = null;
+        }
     }
 }
diff --git a/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs b/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs
index a2cfbe227a..2d19a5a700 100644
--- a/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs
+++ b/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs
@@ -14,6 +14,11 @@ namespace Ryujinx.Graphics.Shader.Translation
         private readonly string _stagePrefix;
 
         private readonly int[] _cbSlotToBindingMap;
+        private readonly int[] _sbSlotToBindingMap;
+        private uint _sbSlotWritten;
+
+        private readonly Dictionary<int, int> _sbSlots;
+        private readonly Dictionary<int, int> _sbSlotsReverse;
 
         private readonly HashSet<int> _usedConstantBufferBindings;
 
@@ -26,7 +31,12 @@ namespace Ryujinx.Graphics.Shader.Translation
             _stagePrefix = GetShaderStagePrefix(stage);
 
             _cbSlotToBindingMap = new int[18];
+            _sbSlotToBindingMap = new int[16];
             _cbSlotToBindingMap.AsSpan().Fill(-1);
+            _sbSlotToBindingMap.AsSpan().Fill(-1);
+
+            _sbSlots = new Dictionary<int, int>();
+            _sbSlotsReverse = new Dictionary<int, int>();
 
             _usedConstantBufferBindings = new HashSet<int>();
 
@@ -47,6 +57,52 @@ namespace Ryujinx.Graphics.Shader.Translation
             return binding;
         }
 
+        public bool TryGetStorageBufferBinding(int sbCbSlot, int sbCbOffset, bool write, out int binding)
+        {
+            if (!TryGetSbSlot((byte)sbCbSlot, (ushort)sbCbOffset, out int slot))
+            {
+                binding = 0;
+                return false;
+            }
+
+            binding = _sbSlotToBindingMap[slot];
+
+            if (binding < 0)
+            {
+                binding = _gpuAccessor.QueryBindingStorageBuffer(slot);
+                _sbSlotToBindingMap[slot] = binding;
+                string slotNumber = slot.ToString(CultureInfo.InvariantCulture);
+                AddNewStorageBuffer(binding, $"{_stagePrefix}_s{slotNumber}");
+            }
+
+            if (write)
+            {
+                _sbSlotWritten |= 1u << slot;
+            }
+
+            return true;
+        }
+
+        private bool TryGetSbSlot(byte sbCbSlot, ushort sbCbOffset, out int slot)
+        {
+            int key = PackSbCbInfo(sbCbSlot, sbCbOffset);
+
+            if (!_sbSlots.TryGetValue(key, out slot))
+            {
+                slot = _sbSlots.Count;
+
+                if (slot >= _sbSlotToBindingMap.Length)
+                {
+                    return false;
+                }
+
+                _sbSlots.Add(key, slot);
+                _sbSlotsReverse.Add(slot, key);
+            }
+
+            return true;
+        }
+
         public bool TryGetConstantBufferSlot(int binding, out int slot)
         {
             for (slot = 0; slot < _cbSlotToBindingMap.Length; slot++)
@@ -90,6 +146,34 @@ namespace Ryujinx.Graphics.Shader.Translation
             return descriptors;
         }
 
+        public BufferDescriptor[] GetStorageBufferDescriptors()
+        {
+            var descriptors = new BufferDescriptor[_sbSlots.Count];
+
+            int descriptorIndex = 0;
+
+            foreach ((int key, int slot) in _sbSlots)
+            {
+                int binding = _sbSlotToBindingMap[slot];
+
+                if (binding >= 0)
+                {
+                    (int sbCbSlot, int sbCbOffset) = UnpackSbCbInfo(key);
+                    descriptors[descriptorIndex++] = new BufferDescriptor(binding, slot, sbCbSlot, sbCbOffset)
+                    {
+                        Flags = (_sbSlotWritten & (1u << slot)) != 0 ? BufferUsageFlags.Write : BufferUsageFlags.None
+                    };
+                }
+            }
+
+            if (descriptors.Length != descriptorIndex)
+            {
+                Array.Resize(ref descriptors, descriptorIndex);
+            }
+
+            return descriptors;
+        }
+
         private void AddNewConstantBuffer(int binding, string name)
         {
             StructureType type = new StructureType(new[]
@@ -100,6 +184,16 @@ namespace Ryujinx.Graphics.Shader.Translation
             _properties.AddConstantBuffer(binding, new BufferDefinition(BufferLayout.Std140, 0, binding, name, type));
         }
 
+        private void AddNewStorageBuffer(int binding, string name)
+        {
+            StructureType type = new StructureType(new[]
+            {
+                new StructureField(AggregateType.Array | AggregateType.U32, "data", 0)
+            });
+
+            _properties.AddStorageBuffer(binding, new BufferDefinition(BufferLayout.Std430, 1, binding, name, type));
+        }
+
         public static string GetShaderStagePrefix(ShaderStage stage)
         {
             uint index = (uint)stage;
@@ -111,5 +205,15 @@ namespace Ryujinx.Graphics.Shader.Translation
 
             return _stagePrefixes[index];
         }
+
+        private static int PackSbCbInfo(int sbCbSlot, int sbCbOffset)
+        {
+            return sbCbOffset | ((int)sbCbSlot << 16);
+        }
+
+        private static (int, int) UnpackSbCbInfo(int key)
+        {
+            return ((byte)(key >> 16), (ushort)key);
+        }
     }
 }
\ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs b/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs
index 866ae5223b..baa88251ba 100644
--- a/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs
+++ b/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs
@@ -2,10 +2,8 @@ using Ryujinx.Graphics.Shader.IntermediateRepresentation;
 using Ryujinx.Graphics.Shader.StructuredIr;
 using System.Collections.Generic;
 using System.Linq;
-using System.Numerics;
 
 using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
-using static Ryujinx.Graphics.Shader.Translation.GlobalMemory;
 
 namespace Ryujinx.Graphics.Shader.Translation
 {
@@ -23,11 +21,10 @@ namespace Ryujinx.Graphics.Shader.Translation
             {
                 BasicBlock block = blocks[blkIndex];
 
-                for (LinkedListNode<INode> node = block.Operations.First; node != null;)
+                for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
                 {
                     if (node.Value is not Operation operation)
                     {
-                        node = node.Next;
                         continue;
                     }
 
@@ -56,8 +53,6 @@ namespace Ryujinx.Graphics.Shader.Translation
                         InsertVectorComponentSelect(node, config);
                     }
 
-                    LinkedListNode<INode> nextNode = node.Next;
-
                     if (operation is TextureOperation texOp)
                     {
                         node = InsertTexelFetchScale(hfm, node, config);
@@ -74,15 +69,7 @@ namespace Ryujinx.Graphics.Shader.Translation
                                 node = InsertSnormNormalization(node, config);
                             }
                         }
-
-                        nextNode = node.Next;
                     }
-                    else if (UsesGlobalMemory(operation.Inst, operation.StorageKind))
-                    {
-                        nextNode = RewriteGlobalAccess(node, config)?.Next ?? nextNode;
-                    }
-
-                    node = nextNode;
                 }
             }
         }
@@ -184,196 +171,6 @@ namespace Ryujinx.Graphics.Shader.Translation
             operation.TurnIntoCopy(result);
         }
 
-        private static LinkedListNode<INode> RewriteGlobalAccess(LinkedListNode<INode> node, ShaderConfig config)
-        {
-            Operation operation = (Operation)node.Value;
-
-            bool isAtomic = operation.Inst.IsAtomic();
-            bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8;
-            bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8;
-
-            Operation storageOp = null;
-
-            Operand PrependOperation(Instruction inst, params Operand[] sources)
-            {
-                Operand local = Local();
-
-                node.List.AddBefore(node, new Operation(inst, local, sources));
-
-                return local;
-            }
-
-            Operand PrependStorageOperation(Instruction inst, StorageKind storageKind, params Operand[] sources)
-            {
-                Operand local = Local();
-
-                node.List.AddBefore(node, new Operation(inst, storageKind, local, sources));
-
-                return local;
-            }
-
-            Operand PrependExistingOperation(Operation operation)
-            {
-                Operand local = Local();
-
-                operation.Dest = local;
-                node.List.AddBefore(node, operation);
-
-                return local;
-            }
-
-            Operand addrLow  = operation.GetSource(0);
-            Operand addrHigh = operation.GetSource(1);
-
-            Operand sbBaseAddrLow = Const(0);
-            Operand sbSlot        = Const(0);
-
-            Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
-
-            Operand BindingRangeCheck(int cbOffset, out Operand baseAddrLow)
-            {
-                baseAddrLow          = Cbuf(DriverReservedCb, cbOffset);
-                Operand baseAddrHigh = Cbuf(DriverReservedCb, cbOffset + 1);
-                Operand size         = Cbuf(DriverReservedCb, cbOffset + 2);
-
-                Operand offset = PrependOperation(Instruction.Subtract, addrLow, baseAddrLow);
-                Operand borrow = PrependOperation(Instruction.CompareLessU32, addrLow, baseAddrLow);
-
-                Operand inRangeLow = PrependOperation(Instruction.CompareLessU32, offset, size);
-
-                Operand addrHighBorrowed = PrependOperation(Instruction.Add, addrHigh, borrow);
-
-                Operand inRangeHigh = PrependOperation(Instruction.CompareEqual, addrHighBorrowed, baseAddrHigh);
-
-                return PrependOperation(Instruction.BitwiseAnd, inRangeLow, inRangeHigh);
-            }
-
-            int sbUseMask = config.AccessibleStorageBuffersMask;
-
-            while (sbUseMask != 0)
-            {
-                int slot = BitOperations.TrailingZeroCount(sbUseMask);
-
-                sbUseMask &= ~(1 << slot);
-
-                int cbOffset = GetStorageCbOffset(config.Stage, slot);
-                slot = config.GetSbSlot(DriverReservedCb, (ushort)cbOffset);
-
-                config.SetUsedStorageBuffer(slot, isWrite);
-
-                Operand inRange = BindingRangeCheck(cbOffset, out Operand baseAddrLow);
-
-                sbBaseAddrLow = PrependOperation(Instruction.ConditionalSelect, inRange, baseAddrLow, sbBaseAddrLow);
-                sbSlot        = PrependOperation(Instruction.ConditionalSelect, inRange, Const(slot), sbSlot);
-            }
-
-            if (config.AccessibleStorageBuffersMask != 0)
-            {
-                Operand baseAddrTrunc = PrependOperation(Instruction.BitwiseAnd, sbBaseAddrLow, alignMask);
-                Operand byteOffset    = PrependOperation(Instruction.Subtract, addrLow, baseAddrTrunc);
-
-                Operand[] sources = new Operand[operation.SourcesCount];
-
-                sources[0] = sbSlot;
-
-                if (isStg16Or8)
-                {
-                    sources[1] = byteOffset;
-                }
-                else
-                {
-                    sources[1] = PrependOperation(Instruction.ShiftRightU32, byteOffset, Const(2));
-                }
-
-                for (int index = 2; index < operation.SourcesCount; index++)
-                {
-                    sources[index] = operation.GetSource(index);
-                }
-
-                if (isAtomic)
-                {
-                    storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources);
-                }
-                else if (operation.Inst == Instruction.LoadGlobal)
-                {
-                    storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources);
-                }
-                else
-                {
-                    Instruction storeInst = operation.Inst switch
-                    {
-                        Instruction.StoreGlobal16 => Instruction.StoreStorage16,
-                        Instruction.StoreGlobal8 => Instruction.StoreStorage8,
-                        _ => Instruction.StoreStorage
-                    };
-
-                    storageOp = new Operation(storeInst, null, sources);
-                }
-            }
-            else if (operation.Dest != null)
-            {
-                storageOp = new Operation(Instruction.Copy, operation.Dest, Const(0));
-            }
-
-            if (operation.Inst == Instruction.LoadGlobal)
-            {
-                int cbeUseMask = config.AccessibleConstantBuffersMask;
-
-                while (cbeUseMask != 0)
-                {
-                    int slot = BitOperations.TrailingZeroCount(cbeUseMask);
-                    int cbSlot = UbeFirstCbuf + slot;
-
-                    cbeUseMask &= ~(1 << slot);
-
-                    Operand previousResult = PrependExistingOperation(storageOp);
-
-                    int cbOffset = GetConstantUbeOffset(slot);
-
-                    Operand inRange = BindingRangeCheck(cbOffset, out Operand baseAddrLow);
-
-                    Operand baseAddrTruncConst = PrependOperation(Instruction.BitwiseAnd, baseAddrLow, alignMask);
-                    Operand byteOffsetConst = PrependOperation(Instruction.Subtract, addrLow, baseAddrTruncConst);
-
-                    Operand cbIndex = PrependOperation(Instruction.ShiftRightU32, byteOffsetConst, Const(2));
-                    Operand vecIndex = PrependOperation(Instruction.ShiftRightU32, cbIndex, Const(2));
-                    Operand elemIndex = PrependOperation(Instruction.BitwiseAnd, cbIndex, Const(3));
-
-                    Operand[] sourcesCb = new Operand[4];
-
-                    sourcesCb[0] = Const(config.ResourceManager.GetConstantBufferBinding(cbSlot));
-                    sourcesCb[1] = Const(0);
-                    sourcesCb[2] = vecIndex;
-                    sourcesCb[3] = elemIndex;
-
-                    Operand ldcResult = PrependStorageOperation(Instruction.Load, StorageKind.ConstantBuffer, sourcesCb);
-
-                    storageOp = new Operation(Instruction.ConditionalSelect, operation.Dest, inRange, ldcResult, previousResult);
-                }
-            }
-
-            for (int index = 0; index < operation.SourcesCount; index++)
-            {
-                operation.SetSource(index, null);
-            }
-
-            LinkedListNode<INode> oldNode = node;
-            LinkedList<INode> oldNodeList = oldNode.List;
-
-            if (storageOp != null)
-            {
-                node = node.List.AddBefore(node, storageOp);
-            }
-            else
-            {
-                node = null;
-            }
-
-            oldNodeList.Remove(oldNode);
-
-            return node;
-        }
-
         private static LinkedListNode<INode> InsertTexelFetchScale(HelperFunctionManager hfm, LinkedListNode<INode> node, ShaderConfig config)
         {
             TextureOperation texOp = (TextureOperation)node.Value;
diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs
index 40a32e2dc0..5c0a1fb606 100644
--- a/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs
+++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs
@@ -110,12 +110,6 @@ namespace Ryujinx.Graphics.Shader.Translation
         public UInt128 NextInputAttributesComponents { get; private set; }
         public UInt128 ThisInputAttributesComponents { get; private set; }
 
-        public int AccessibleStorageBuffersMask { get; private set; }
-        public int AccessibleConstantBuffersMask { get; private set; }
-
-        private int _usedStorageBuffers;
-        private int _usedStorageBuffersWrite;
-
         private readonly record struct TextureInfo(int CbufSlot, int Handle, bool Indexed, TextureFormat Format);
 
         private struct TextureMeta
@@ -127,18 +121,9 @@ namespace Ryujinx.Graphics.Shader.Translation
 
         private readonly Dictionary<TextureInfo, TextureMeta> _usedTextures;
         private readonly Dictionary<TextureInfo, TextureMeta> _usedImages;
-
-        private readonly Dictionary<int, int> _sbSlots;
-        private readonly Dictionary<int, int> _sbSlotsReverse;
-
-        private BufferDescriptor[] _cachedStorageBufferDescriptors;
         private TextureDescriptor[] _cachedTextureDescriptors;
         private TextureDescriptor[] _cachedImageDescriptors;
 
-        private int _firstStorageBufferBinding;
-
-        public int FirstStorageBufferBinding => _firstStorageBufferBinding;
-
         public ShaderConfig(ShaderStage stage, IGpuAccessor gpuAccessor, TranslationOptions options)
         {
             Stage       = stage;
@@ -147,18 +132,12 @@ namespace Ryujinx.Graphics.Shader.Translation
 
             _transformFeedbackDefinitions = new Dictionary<TransformFeedbackVariable, TransformFeedbackOutput>();
 
-            AccessibleStorageBuffersMask  = (1 << GlobalMemory.StorageMaxCount) - 1;
-            AccessibleConstantBuffersMask = (1 << GlobalMemory.UbeMaxCount) - 1;
-
             UsedInputAttributesPerPatch  = new HashSet<int>();
             UsedOutputAttributesPerPatch = new HashSet<int>();
 
             _usedTextures = new Dictionary<TextureInfo, TextureMeta>();
             _usedImages   = new Dictionary<TextureInfo, TextureMeta>();
 
-            _sbSlots        = new Dictionary<int, int>();
-            _sbSlotsReverse = new Dictionary<int, int>();
-
             ResourceManager = new ResourceManager(stage, gpuAccessor, new ShaderProperties());
         }
 
@@ -173,11 +152,6 @@ namespace Ryujinx.Graphics.Shader.Translation
             OutputTopology           = outputTopology;
             MaxOutputVertices        = maxOutputVertices;
             TransformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled();
-
-            if (Stage != ShaderStage.Compute)
-            {
-                AccessibleConstantBuffersMask = 0;
-            }
         }
 
         public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationOptions options) : this(header.Stage, gpuAccessor, options)
@@ -433,8 +407,6 @@ namespace Ryujinx.Graphics.Shader.Translation
 
             UsedInputAttributes |= other.UsedInputAttributes;
             UsedOutputAttributes |= other.UsedOutputAttributes;
-            _usedStorageBuffers |= other._usedStorageBuffers;
-            _usedStorageBuffersWrite |= other._usedStorageBuffersWrite;
 
             foreach (var kv in other._usedTextures)
             {
@@ -634,23 +606,6 @@ namespace Ryujinx.Graphics.Shader.Translation
             UsedFeatures |= flags;
         }
 
-        public void SetAccessibleBufferMasks(int sbMask, int ubeMask)
-        {
-            AccessibleStorageBuffersMask = sbMask;
-            AccessibleConstantBuffersMask = ubeMask;
-        }
-
-        public void SetUsedStorageBuffer(int slot, bool write)
-        {
-            int mask = 1 << slot;
-            _usedStorageBuffers |= mask;
-
-            if (write)
-            {
-                _usedStorageBuffersWrite |= mask;
-            }
-        }
-
         public void SetUsedTexture(
             Instruction inst,
             SamplerType type,
@@ -756,76 +711,6 @@ namespace Ryujinx.Graphics.Shader.Translation
             return meta;
         }
 
-        public BufferDescriptor[] GetStorageBufferDescriptors()
-        {
-            if (_cachedStorageBufferDescriptors != null)
-            {
-                return _cachedStorageBufferDescriptors;
-            }
-
-            return _cachedStorageBufferDescriptors = GetStorageBufferDescriptors(
-                _usedStorageBuffers,
-                _usedStorageBuffersWrite,
-                true,
-                out _firstStorageBufferBinding,
-                GpuAccessor.QueryBindingStorageBuffer);
-        }
-
-        private BufferDescriptor[] GetStorageBufferDescriptors(
-            int usedMask,
-            int writtenMask,
-            bool isArray,
-            out int firstBinding,
-            Func<int, int> getBindingCallback)
-        {
-            firstBinding = 0;
-            bool hasFirstBinding = false;
-            var descriptors = new BufferDescriptor[BitOperations.PopCount((uint)usedMask)];
-
-            int lastSlot = -1;
-
-            for (int i = 0; i < descriptors.Length; i++)
-            {
-                int slot = BitOperations.TrailingZeroCount(usedMask);
-
-                if (isArray)
-                {
-                    // The next array entries also consumes bindings, even if they are unused.
-                    for (int j = lastSlot + 1; j < slot; j++)
-                    {
-                        int binding = getBindingCallback(j);
-
-                        if (!hasFirstBinding)
-                        {
-                            firstBinding = binding;
-                            hasFirstBinding = true;
-                        }
-                    }
-                }
-
-                lastSlot = slot;
-
-                (int sbCbSlot, int sbCbOffset) = GetSbCbInfo(slot);
-
-                descriptors[i] = new BufferDescriptor(getBindingCallback(slot), slot, sbCbSlot, sbCbOffset);
-
-                if (!hasFirstBinding)
-                {
-                    firstBinding = descriptors[i].Binding;
-                    hasFirstBinding = true;
-                }
-
-                if ((writtenMask & (1 << slot)) != 0)
-                {
-                    descriptors[i].SetFlag(BufferUsageFlags.Write);
-                }
-
-                usedMask &= ~(1 << slot);
-            }
-
-            return descriptors;
-        }
-
         public TextureDescriptor[] GetTextureDescriptors()
         {
             return _cachedTextureDescriptors ??= GetTextureOrImageDescriptors(_usedTextures, GpuAccessor.QueryBindingTexture);
@@ -922,45 +807,11 @@ namespace Ryujinx.Graphics.Shader.Translation
             return FindDescriptorIndex(GetImageDescriptors(), texOp);
         }
 
-        public int GetSbSlot(byte sbCbSlot, ushort sbCbOffset)
-        {
-            int key = PackSbCbInfo(sbCbSlot, sbCbOffset);
-
-            if (!_sbSlots.TryGetValue(key, out int slot))
-            {
-                slot = _sbSlots.Count;
-                _sbSlots.Add(key, slot);
-                _sbSlotsReverse.Add(slot, key);
-            }
-
-            return slot;
-        }
-
-        public (int, int) GetSbCbInfo(int slot)
-        {
-            if (_sbSlotsReverse.TryGetValue(slot, out int key))
-            {
-                return UnpackSbCbInfo(key);
-            }
-
-            throw new ArgumentException($"Invalid slot {slot}.", nameof(slot));
-        }
-
-        private static int PackSbCbInfo(int sbCbSlot, int sbCbOffset)
-        {
-            return sbCbOffset | ((int)sbCbSlot << 16);
-        }
-
-        private static (int, int) UnpackSbCbInfo(int key)
-        {
-            return ((byte)(key >> 16), (ushort)key);
-        }
-
         public ShaderProgramInfo CreateProgramInfo(ShaderIdentification identification = ShaderIdentification.None)
         {
             return new ShaderProgramInfo(
                 ResourceManager.GetConstantBufferDescriptors(),
-                GetStorageBufferDescriptors(),
+                ResourceManager.GetStorageBufferDescriptors(),
                 GetTextureDescriptors(),
                 GetImageDescriptors(),
                 identification,
diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs
index 867e243795..6840043797 100644
--- a/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs
+++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs
@@ -48,7 +48,7 @@ namespace Ryujinx.Graphics.Shader.Translation
                         continue;
                     }
 
-                    if (IsResourceWrite(operation.Inst))
+                    if (IsResourceWrite(operation.Inst, operation.StorageKind))
                     {
                         return false;
                     }
@@ -154,7 +154,7 @@ namespace Ryujinx.Graphics.Shader.Translation
             return totalVerticesCount + verticesCount == 3 && writesLayer;
         }
 
-        private static bool IsResourceWrite(Instruction inst)
+        private static bool IsResourceWrite(Instruction inst, StorageKind storageKind)
         {
             switch (inst)
             {
@@ -170,13 +170,11 @@ namespace Ryujinx.Graphics.Shader.Translation
                 case Instruction.AtomicXor:
                 case Instruction.ImageAtomic:
                 case Instruction.ImageStore:
-                case Instruction.StoreGlobal:
-                case Instruction.StoreGlobal16:
-                case Instruction.StoreGlobal8:
-                case Instruction.StoreStorage:
-                case Instruction.StoreStorage16:
-                case Instruction.StoreStorage8:
                     return true;
+                case Instruction.Store:
+                    return storageKind == StorageKind.StorageBuffer ||
+                           storageKind == StorageKind.SharedMemory ||
+                           storageKind == StorageKind.LocalMemory;
             }
 
             return false;
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Translator.cs b/src/Ryujinx.Graphics.Shader/Translation/Translator.cs
index 5bbc00097d..c0212a5bc9 100644
--- a/src/Ryujinx.Graphics.Shader/Translation/Translator.cs
+++ b/src/Ryujinx.Graphics.Shader/Translation/Translator.cs
@@ -78,7 +78,7 @@ namespace Ryujinx.Graphics.Shader.Translation
 
                     Ssa.Rename(cfg.Blocks);
 
-                    Optimizer.RunPass(cfg.Blocks, config);
+                    Optimizer.RunPass(hfm, cfg.Blocks, config);
                     Rewriter.RunPass(hfm, cfg.Blocks, config);
                 }
 
diff --git a/src/Ryujinx.Graphics.Vulkan/BufferManager.cs b/src/Ryujinx.Graphics.Vulkan/BufferManager.cs
index e046bf8992..521a132a79 100644
--- a/src/Ryujinx.Graphics.Vulkan/BufferManager.cs
+++ b/src/Ryujinx.Graphics.Vulkan/BufferManager.cs
@@ -115,8 +115,6 @@ namespace Ryujinx.Graphics.Vulkan
             holder = Create(gd, size, baseType: baseType, storageHint: storageHint);
             if (holder == null)
             {
-                Logger.Error?.Print(LogClass.Gpu, $"Failed to create buffer with size 0x{size:X} and type \"{baseType}\".");
-
                 return BufferHandle.Null;
             }
 
@@ -264,6 +262,8 @@ namespace Ryujinx.Graphics.Vulkan
                 return holder;
             }
 
+            Logger.Error?.Print(LogClass.Gpu, $"Failed to create buffer with size 0x{size:X} and type \"{baseType}\".");
+
             return null;
         }
 
diff --git a/src/Ryujinx.Graphics.Vulkan/DescriptorSetCollection.cs b/src/Ryujinx.Graphics.Vulkan/DescriptorSetCollection.cs
index c57cb1a95a..70b3ebfe47 100644
--- a/src/Ryujinx.Graphics.Vulkan/DescriptorSetCollection.cs
+++ b/src/Ryujinx.Graphics.Vulkan/DescriptorSetCollection.cs
@@ -70,30 +70,6 @@ namespace Ryujinx.Graphics.Vulkan
             }
         }
 
-        public unsafe void UpdateStorageBuffers(int setIndex, int baseBinding, ReadOnlySpan<DescriptorBufferInfo> bufferInfo)
-        {
-            if (bufferInfo.Length == 0)
-            {
-                return;
-            }
-
-            fixed (DescriptorBufferInfo* pBufferInfo = bufferInfo)
-            {
-                var writeDescriptorSet = new WriteDescriptorSet
-                {
-                    SType = StructureType.WriteDescriptorSet,
-                    DstSet = _descriptorSets[setIndex],
-                    DstBinding = (uint)(baseBinding & ~(Constants.MaxStorageBuffersPerStage - 1)),
-                    DstArrayElement = (uint)(baseBinding & (Constants.MaxStorageBuffersPerStage - 1)),
-                    DescriptorType = DescriptorType.StorageBuffer,
-                    DescriptorCount = (uint)bufferInfo.Length,
-                    PBufferInfo = pBufferInfo
-                };
-
-                _holder.Api.UpdateDescriptorSets(_holder.Device, 1, writeDescriptorSet, 0, null);
-            }
-        }
-
         public unsafe void UpdateImage(int setIndex, int bindingIndex, DescriptorImageInfo imageInfo, DescriptorType type)
         {
             if (imageInfo.ImageView.Handle != 0UL)
diff --git a/src/Ryujinx.Graphics.Vulkan/DescriptorSetUpdater.cs b/src/Ryujinx.Graphics.Vulkan/DescriptorSetUpdater.cs
index f3ac36e138..cbac1cd473 100644
--- a/src/Ryujinx.Graphics.Vulkan/DescriptorSetUpdater.cs
+++ b/src/Ryujinx.Graphics.Vulkan/DescriptorSetUpdater.cs
@@ -448,14 +448,7 @@ namespace Ryujinx.Graphics.Vulkan
                     }
 
                     ReadOnlySpan<DescriptorBufferInfo> storageBuffers = _storageBuffers;
-                    if (program.HasMinimalLayout)
-                    {
-                        dsc.UpdateBuffers(0, binding, storageBuffers.Slice(binding, count), DescriptorType.StorageBuffer);
-                    }
-                    else
-                    {
-                        dsc.UpdateStorageBuffers(0, binding, storageBuffers.Slice(binding, count));
-                    }
+                    dsc.UpdateBuffers(0, binding, storageBuffers.Slice(binding, count), DescriptorType.StorageBuffer);
                 }
                 else if (setIndex == PipelineBase.TextureSetIndex)
                 {