diff --git a/Ryujinx.Graphics.Gpu/Engine/Compute.cs b/Ryujinx.Graphics.Gpu/Engine/Compute.cs
index d24d2d8d72..9178cfb0d8 100644
--- a/Ryujinx.Graphics.Gpu/Engine/Compute.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/Compute.cs
@@ -17,29 +17,31 @@ namespace Ryujinx.Graphics.Gpu.Engine
         /// <param name="argument">Method call argument</param>
         public void Dispatch(GpuState state, int argument)
         {
-            uint dispatchParamsAddress = (uint)state.Get<int>(MethodOffset.DispatchParamsAddress);
+            uint qmdAddress = (uint)state.Get<int>(MethodOffset.DispatchParamsAddress);
 
-            var dispatchParams = _context.MemoryAccessor.Read<ComputeParams>((ulong)dispatchParamsAddress << 8);
+            var qmd = _context.MemoryAccessor.Read<ComputeQmd>((ulong)qmdAddress << 8);
 
             GpuVa shaderBaseAddress = state.Get<GpuVa>(MethodOffset.ShaderBaseAddress);
 
-            ulong shaderGpuVa = shaderBaseAddress.Pack() + (uint)dispatchParams.ShaderOffset;
+            ulong shaderGpuVa = shaderBaseAddress.Pack() + (uint)qmd.ProgramOffset;
 
-            // Note: A size of 0 is also invalid, the size must be at least 1.
-            int sharedMemorySize = Math.Clamp(dispatchParams.SharedMemorySize & 0xffff, 1, _context.Capabilities.MaximumComputeSharedMemorySize);
+            int localMemorySize = qmd.ShaderLocalMemoryLowSize + qmd.ShaderLocalMemoryHighSize;
+
+            int sharedMemorySize = Math.Min(qmd.SharedMemorySize, _context.Capabilities.MaximumComputeSharedMemorySize);
 
             ComputeShader cs = ShaderCache.GetComputeShader(
                 shaderGpuVa,
-                sharedMemorySize,
-                dispatchParams.UnpackBlockSizeX(),
-                dispatchParams.UnpackBlockSizeY(),
-                dispatchParams.UnpackBlockSizeZ());
+                qmd.CtaThreadDimension0,
+                qmd.CtaThreadDimension1,
+                qmd.CtaThreadDimension2,
+                localMemorySize,
+                sharedMemorySize);
 
             _context.Renderer.Pipeline.SetProgram(cs.HostProgram);
 
             var samplerPool = state.Get<PoolState>(MethodOffset.SamplerPoolState);
 
-            TextureManager.SetComputeSamplerPool(samplerPool.Address.Pack(), samplerPool.MaximumId, dispatchParams.SamplerIndex);
+            TextureManager.SetComputeSamplerPool(samplerPool.Address.Pack(), samplerPool.MaximumId, qmd.SamplerIndex);
 
             var texturePool = state.Get<PoolState>(MethodOffset.TexturePoolState);
 
@@ -50,17 +52,19 @@ namespace Ryujinx.Graphics.Gpu.Engine
             ShaderProgramInfo info = cs.Shader.Program.Info;
 
             uint sbEnableMask = 0;
-            uint ubEnableMask = dispatchParams.UnpackUniformBuffersEnableMask();
+            uint ubEnableMask = 0;
 
-            for (int index = 0; index < dispatchParams.UniformBuffers.Length; index++)
+            for (int index = 0; index < Constants.TotalCpUniformBuffers; index++)
             {
-                if ((ubEnableMask & (1 << index)) == 0)
+                if (!qmd.ConstantBufferValid(index))
                 {
                     continue;
                 }
 
-                ulong gpuVa = dispatchParams.UniformBuffers[index].PackAddress();
-                ulong size  = dispatchParams.UniformBuffers[index].UnpackSize();
+                ubEnableMask |= 1u << index;
+
+                ulong gpuVa = (uint)qmd.ConstantBufferAddrLower(index) | (ulong)qmd.ConstantBufferAddrUpper(index) << 32;
+                ulong size = (ulong)qmd.ConstantBufferSize(index);
 
                 BufferManager.SetComputeUniformBuffer(index, gpuVa, size);
             }
@@ -131,9 +135,9 @@ namespace Ryujinx.Graphics.Gpu.Engine
             TextureManager.CommitComputeBindings();
 
             _context.Renderer.Pipeline.DispatchCompute(
-                dispatchParams.UnpackGridSizeX(),
-                dispatchParams.UnpackGridSizeY(),
-                dispatchParams.UnpackGridSizeZ());
+                qmd.CtaRasterWidth,
+                qmd.CtaRasterHeight,
+                qmd.CtaRasterDepth);
 
             UpdateShaderState(state);
         }
diff --git a/Ryujinx.Graphics.Gpu/Engine/ComputeParams.cs b/Ryujinx.Graphics.Gpu/Engine/ComputeParams.cs
deleted file mode 100644
index c19b43d81e..0000000000
--- a/Ryujinx.Graphics.Gpu/Engine/ComputeParams.cs
+++ /dev/null
@@ -1,173 +0,0 @@
-using Ryujinx.Graphics.Gpu.State;
-using System;
-using System.Runtime.InteropServices;
-
-namespace Ryujinx.Graphics.Gpu.Engine
-{
-    /// <summary>
-    /// Compute uniform buffer parameters.
-    /// </summary>
-    struct UniformBufferParams
-    {
-        public int AddressLow;
-        public int AddressHighAndSize;
-
-        /// <summary>
-        /// Packs the split address to a 64-bits integer.
-        /// </summary>
-        /// <returns>Uniform buffer GPU virtual address</returns>
-        public ulong PackAddress()
-        {
-            return (uint)AddressLow | ((ulong)(AddressHighAndSize & 0xff) << 32);
-        }
-
-        /// <summary>
-        /// Unpacks the uniform buffer size in bytes.
-        /// </summary>
-        /// <returns>Uniform buffer size in bytes</returns>
-        public ulong UnpackSize()
-        {
-            return (ulong)((AddressHighAndSize >> 15) & 0x1ffff);
-        }
-    }
-
-    /// <summary>
-    /// Compute dispatch parameters.
-    /// </summary>
-    struct ComputeParams
-    {
-        public int Unknown0;
-        public int Unknown1;
-        public int Unknown2;
-        public int Unknown3;
-        public int Unknown4;
-        public int Unknown5;
-        public int Unknown6;
-        public int Unknown7;
-        public int ShaderOffset;
-        public int Unknown9;
-        public int Unknown10;
-        public SamplerIndex SamplerIndex;
-        public int GridSizeX;
-        public int GridSizeYZ;
-        public int Unknown14;
-        public int Unknown15;
-        public int Unknown16;
-        public int SharedMemorySize;
-        public int BlockSizeX;
-        public int BlockSizeYZ;
-        public int UniformBuffersConfig;
-        public int Unknown21;
-        public int Unknown22;
-        public int Unknown23;
-        public int Unknown24;
-        public int Unknown25;
-        public int Unknown26;
-        public int Unknown27;
-        public int Unknown28;
-
-        private UniformBufferParams _uniformBuffer0;
-        private UniformBufferParams _uniformBuffer1;
-        private UniformBufferParams _uniformBuffer2;
-        private UniformBufferParams _uniformBuffer3;
-        private UniformBufferParams _uniformBuffer4;
-        private UniformBufferParams _uniformBuffer5;
-        private UniformBufferParams _uniformBuffer6;
-        private UniformBufferParams _uniformBuffer7;
-
-        /// <summary>
-        /// Uniform buffer parameters.
-        /// </summary>
-        public Span<UniformBufferParams> UniformBuffers
-        {
-            get
-            {
-                return MemoryMarshal.CreateSpan(ref _uniformBuffer0, 8);
-            }
-        }
-
-        public int Unknown45;
-        public int Unknown46;
-        public int Unknown47;
-        public int Unknown48;
-        public int Unknown49;
-        public int Unknown50;
-        public int Unknown51;
-        public int Unknown52;
-        public int Unknown53;
-        public int Unknown54;
-        public int Unknown55;
-        public int Unknown56;
-        public int Unknown57;
-        public int Unknown58;
-        public int Unknown59;
-        public int Unknown60;
-        public int Unknown61;
-        public int Unknown62;
-        public int Unknown63;
-
-        /// <summary>
-        /// Unpacks the work group X size.
-        /// </summary>
-        /// <returns>Work group X size</returns>
-        public int UnpackGridSizeX()
-        {
-            return GridSizeX & 0x7fffffff;
-        }
-
-        /// <summary>
-        /// Unpacks the work group Y size.
-        /// </summary>
-        /// <returns>Work group Y size</returns>
-        public int UnpackGridSizeY()
-        {
-            return GridSizeYZ & 0xffff;
-        }
-
-        /// <summary>
-        /// Unpacks the work group Z size.
-        /// </summary>
-        /// <returns>Work group Z size</returns>
-        public int UnpackGridSizeZ()
-        {
-            return (GridSizeYZ >> 16) & 0xffff;
-        }
-
-        /// <summary>
-        /// Unpacks the local group X size.
-        /// </summary>
-        /// <returns>Local group X size</returns>
-        public int UnpackBlockSizeX()
-        {
-            return (BlockSizeX >> 16) & 0xffff;
-        }
-
-        /// <summary>
-        /// Unpacks the local group Y size.
-        /// </summary>
-        /// <returns>Local group Y size</returns>
-        public int UnpackBlockSizeY()
-        {
-            return BlockSizeYZ & 0xffff;
-        }
-
-        /// <summary>
-        /// Unpacks the local group Z size.
-        /// </summary>
-        /// <returns>Local group Z size</returns>
-        public int UnpackBlockSizeZ()
-        {
-            return (BlockSizeYZ >> 16) & 0xffff;
-        }
-
-        /// <summary>
-        /// Unpacks the uniform buffers enable mask.
-        /// Each bit set on the mask indicates that the respective buffer index is enabled.
-        /// </summary>
-        /// <returns>Uniform buffers enable mask</returns>
-        public uint UnpackUniformBuffersEnableMask()
-        {
-            return (uint)UniformBuffersConfig & 0xff;
-        }
-    }
-}
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Gpu/Engine/ComputeQmd.cs b/Ryujinx.Graphics.Gpu/Engine/ComputeQmd.cs
new file mode 100644
index 0000000000..35418c2d80
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Engine/ComputeQmd.cs
@@ -0,0 +1,275 @@
+using Ryujinx.Graphics.Gpu.State;
+using System;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine
+{
+    /// <summary>
+    /// Type of the dependent Queue Meta Data.
+    /// </summary>
+    enum DependentQmdType
+    {
+        Queue,
+        Grid
+    }
+
+    /// <summary>
+    /// Type of the release memory barrier.
+    /// </summary>
+    enum ReleaseMembarType
+    {
+        FeNone,
+        FeSysmembar
+    }
+
+    /// <summary>
+    /// Type of the CWD memory barrier.
+    /// </summary>
+    enum CwdMembarType
+    {
+        L1None,
+        L1Sysmembar,
+        L1Membar
+    }
+
+    /// <summary>
+    /// NaN behavior of 32-bits float operations on the shader.
+    /// </summary>
+    enum Fp32NanBehavior
+    {
+        Legacy,
+        Fp64Compatible
+    }
+
+    /// <summary>
+    /// NaN behavior of 32-bits float to integer conversion on the shader.
+    /// </summary>
+    enum Fp32F2iNanBehavior
+    {
+        PassZero,
+        PassIndefinite
+    }
+
+    /// <summary>
+    /// Limit of calls.
+    /// </summary>
+    enum ApiVisibleCallLimit
+    {
+        _32,
+        NoCheck
+    }
+
+    /// <summary>
+    /// Shared memory bank mapping mode.
+    /// </summary>
+    enum SharedMemoryBankMapping
+    {
+        FourBytesPerBank,
+        EightBytesPerBank
+    }
+
+    /// <summary>
+    /// Denormal behavior of 32-bits float narrowing instructions.
+    /// </summary>
+    enum Fp32NarrowInstruction
+    {
+        KeepDenorms,
+        FlushDenorms
+    }
+
+    /// <summary>
+    /// Configuration of the L1 cache.
+    /// </summary>
+    enum L1Configuration
+    {
+        DirectlyAddressableMemorySize16kb,
+        DirectlyAddressableMemorySize32kb,
+        DirectlyAddressableMemorySize48kb
+    }
+
+    /// <summary>
+    /// Reduction operation.
+    /// </summary>
+    enum ReductionOp
+    {
+        RedAdd,
+        RedMin,
+        RedMax,
+        RedInc,
+        RedDec,
+        RedAnd,
+        RedOr,
+        RedXor
+    }
+
+    /// <summary>
+    /// Reduction format.
+    /// </summary>
+    enum ReductionFormat
+    {
+        Unsigned32,
+        Signed32
+    }
+
+    /// <summary>
+    /// Size of a structure in words.
+    /// </summary>
+    enum StructureSize
+    {
+        FourWords,
+        OneWord
+    }
+
+    /// <summary>
+    /// Compute Queue Meta Data.
+    /// </summary>
+    unsafe struct ComputeQmd
+    {
+        private fixed int _words[64];
+
+        public int OuterPut => BitRange(30, 0);
+        public bool OuterOverflow => Bit(31);
+        public int OuterGet => BitRange(62, 32);
+        public bool OuterStickyOverflow => Bit(63);
+        public int InnerGet => BitRange(94, 64);
+        public bool InnerOverflow => Bit(95);
+        public int InnerPut => BitRange(126, 96);
+        public bool InnerStickyOverflow => Bit(127);
+        public int QmdReservedAA => BitRange(159, 128);
+        public int DependentQmdPointer => BitRange(191, 160);
+        public int QmdGroupId => BitRange(197, 192);
+        public bool SmGlobalCachingEnable => Bit(198);
+        public bool RunCtaInOneSmPartition => Bit(199);
+        public bool IsQueue => Bit(200);
+        public bool AddToHeadOfQmdGroupLinkedList => Bit(201);
+        public bool SemaphoreReleaseEnable0 => Bit(202);
+        public bool SemaphoreReleaseEnable1 => Bit(203);
+        public bool RequireSchedulingPcas => Bit(204);
+        public bool DependentQmdScheduleEnable => Bit(205);
+        public DependentQmdType DependentQmdType => (DependentQmdType)BitRange(206, 206);
+        public bool DependentQmdFieldCopy => Bit(207);
+        public int QmdReservedB => BitRange(223, 208);
+        public int CircularQueueSize => BitRange(248, 224);
+        public bool QmdReservedC => Bit(249);
+        public bool InvalidateTextureHeaderCache => Bit(250);
+        public bool InvalidateTextureSamplerCache => Bit(251);
+        public bool InvalidateTextureDataCache => Bit(252);
+        public bool InvalidateShaderDataCache => Bit(253);
+        public bool InvalidateInstructionCache => Bit(254);
+        public bool InvalidateShaderConstantCache => Bit(255);
+        public int ProgramOffset => BitRange(287, 256);
+        public int CircularQueueAddrLower => BitRange(319, 288);
+        public int CircularQueueAddrUpper => BitRange(327, 320);
+        public int QmdReservedD => BitRange(335, 328);
+        public int CircularQueueEntrySize => BitRange(351, 336);
+        public int CwdReferenceCountId => BitRange(357, 352);
+        public int CwdReferenceCountDeltaMinusOne => BitRange(365, 358);
+        public ReleaseMembarType ReleaseMembarType => (ReleaseMembarType)BitRange(366, 366);
+        public bool CwdReferenceCountIncrEnable => Bit(367);
+        public CwdMembarType CwdMembarType => (CwdMembarType)BitRange(369, 368);
+        public bool SequentiallyRunCtas => Bit(370);
+        public bool CwdReferenceCountDecrEnable => Bit(371);
+        public bool Throttled => Bit(372);
+        public Fp32NanBehavior Fp32NanBehavior => (Fp32NanBehavior)BitRange(376, 376);
+        public Fp32F2iNanBehavior Fp32F2iNanBehavior => (Fp32F2iNanBehavior)BitRange(377, 377);
+        public ApiVisibleCallLimit ApiVisibleCallLimit => (ApiVisibleCallLimit)BitRange(378, 378);
+        public SharedMemoryBankMapping SharedMemoryBankMapping => (SharedMemoryBankMapping)BitRange(379, 379);
+        public SamplerIndex SamplerIndex => (SamplerIndex)BitRange(382, 382);
+        public Fp32NarrowInstruction Fp32NarrowInstruction => (Fp32NarrowInstruction)BitRange(383, 383);
+        public int CtaRasterWidth => BitRange(415, 384);
+        public int CtaRasterHeight => BitRange(431, 416);
+        public int CtaRasterDepth => BitRange(447, 432);
+        public int CtaRasterWidthResume => BitRange(479, 448);
+        public int CtaRasterHeightResume => BitRange(495, 480);
+        public int CtaRasterDepthResume => BitRange(511, 496);
+        public int QueueEntriesPerCtaMinusOne => BitRange(518, 512);
+        public int CoalesceWaitingPeriod => BitRange(529, 522);
+        public int SharedMemorySize => BitRange(561, 544);
+        public int QmdReservedG => BitRange(575, 562);
+        public int QmdVersion => BitRange(579, 576);
+        public int QmdMajorVersion => BitRange(583, 580);
+        public int QmdReservedH => BitRange(591, 584);
+        public int CtaThreadDimension0 => BitRange(607, 592);
+        public int CtaThreadDimension1 => BitRange(623, 608);
+        public int CtaThreadDimension2 => BitRange(639, 624);
+        public bool ConstantBufferValid(int i) => Bit(640 + i * 1);
+        public int QmdReservedI => BitRange(668, 648);
+        public L1Configuration L1Configuration => (L1Configuration)BitRange(671, 669);
+        public int SmDisableMaskLower => BitRange(703, 672);
+        public int SmDisableMaskUpper => BitRange(735, 704);
+        public int Release0AddressLower => BitRange(767, 736);
+        public int Release0AddressUpper => BitRange(775, 768);
+        public int QmdReservedJ => BitRange(783, 776);
+        public ReductionOp Release0ReductionOp => (ReductionOp)BitRange(790, 788);
+        public bool QmdReservedK => Bit(791);
+        public ReductionFormat Release0ReductionFormat => (ReductionFormat)BitRange(793, 792);
+        public bool Release0ReductionEnable => Bit(794);
+        public StructureSize Release0StructureSize => (StructureSize)BitRange(799, 799);
+        public int Release0Payload => BitRange(831, 800);
+        public int Release1AddressLower => BitRange(863, 832);
+        public int Release1AddressUpper => BitRange(871, 864);
+        public int QmdReservedL => BitRange(879, 872);
+        public ReductionOp Release1ReductionOp => (ReductionOp)BitRange(886, 884);
+        public bool QmdReservedM => Bit(887);
+        public ReductionFormat Release1ReductionFormat => (ReductionFormat)BitRange(889, 888);
+        public bool Release1ReductionEnable => Bit(890);
+        public StructureSize Release1StructureSize => (StructureSize)BitRange(895, 895);
+        public int Release1Payload => BitRange(927, 896);
+        public int ConstantBufferAddrLower(int i) => BitRange(959 + i * 64, 928 + i * 64);
+        public int ConstantBufferAddrUpper(int i) => BitRange(967 + i * 64, 960 + i * 64);
+        public int ConstantBufferReservedAddr(int i) => BitRange(973 + i * 64, 968 + i * 64);
+        public bool ConstantBufferInvalidate(int i) => Bit(974 + i * 64);
+        public int ConstantBufferSize(int i) => BitRange(991 + i * 64, 975 + i * 64);
+        public int ShaderLocalMemoryLowSize => BitRange(1463, 1440);
+        public int QmdReservedN => BitRange(1466, 1464);
+        public int BarrierCount => BitRange(1471, 1467);
+        public int ShaderLocalMemoryHighSize => BitRange(1495, 1472);
+        public int RegisterCount => BitRange(1503, 1496);
+        public int ShaderLocalMemoryCrsSize => BitRange(1527, 1504);
+        public int SassVersion => BitRange(1535, 1528);
+        public int HwOnlyInnerGet => BitRange(1566, 1536);
+        public bool HwOnlyRequireSchedulingPcas => Bit(1567);
+        public int HwOnlyInnerPut => BitRange(1598, 1568);
+        public bool HwOnlyScgType => Bit(1599);
+        public int HwOnlySpanListHeadIndex => BitRange(1629, 1600);
+        public bool QmdReservedQ => Bit(1630);
+        public bool HwOnlySpanListHeadIndexValid => Bit(1631);
+        public int HwOnlySkedNextQmdPointer => BitRange(1663, 1632);
+        public int QmdSpareE => BitRange(1695, 1664);
+        public int QmdSpareF => BitRange(1727, 1696);
+        public int QmdSpareG => BitRange(1759, 1728);
+        public int QmdSpareH => BitRange(1791, 1760);
+        public int QmdSpareI => BitRange(1823, 1792);
+        public int QmdSpareJ => BitRange(1855, 1824);
+        public int QmdSpareK => BitRange(1887, 1856);
+        public int QmdSpareL => BitRange(1919, 1888);
+        public int QmdSpareM => BitRange(1951, 1920);
+        public int QmdSpareN => BitRange(1983, 1952);
+        public int DebugIdUpper => BitRange(2015, 1984);
+        public int DebugIdLower => BitRange(2047, 2016);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private bool Bit(int bit)
+        {
+            if ((uint)bit >= 64 * 32)
+            {
+                throw new ArgumentOutOfRangeException(nameof(bit));
+            }
+
+            return (_words[bit >> 5] & (1 << (bit & 31))) != 0;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private int BitRange(int upper, int lower)
+        {
+            if ((uint)lower >= 64 * 32)
+            {
+                throw new ArgumentOutOfRangeException(nameof(lower));
+            }
+
+            int mask = (int)(uint.MaxValue >> (32 - (upper - lower + 1)));
+
+            return (_words[lower >> 5] >> (lower & 31)) & mask;
+        }
+    }
+}
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Gpu/Ryujinx.Graphics.Gpu.csproj b/Ryujinx.Graphics.Gpu/Ryujinx.Graphics.Gpu.csproj
index b9751508ef..a55c4d1ceb 100644
--- a/Ryujinx.Graphics.Gpu/Ryujinx.Graphics.Gpu.csproj
+++ b/Ryujinx.Graphics.Gpu/Ryujinx.Graphics.Gpu.csproj
@@ -13,4 +13,12 @@
     <RuntimeIdentifiers>win-x64;osx-x64;linux-x64</RuntimeIdentifiers>
   </PropertyGroup>
 
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+
 </Project>
diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
index dad1b0ac2e..8aa9b1c7b5 100644
--- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
@@ -51,12 +51,19 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// This automatically translates, compiles and adds the code to the cache if not present.
         /// </remarks>
         /// <param name="gpuVa">GPU virtual address of the binary shader code</param>
-        /// <param name="sharedMemorySize">Shared memory size of the compute shader</param>
         /// <param name="localSizeX">Local group size X of the computer shader</param>
         /// <param name="localSizeY">Local group size Y of the computer shader</param>
         /// <param name="localSizeZ">Local group size Z of the computer shader</param>
+        /// <param name="localMemorySize">Local memory size of the compute shader</param>
+        /// <param name="sharedMemorySize">Shared memory size of the compute shader</param>
         /// <returns>Compiled compute shader code</returns>
-        public ComputeShader GetComputeShader(ulong gpuVa, int sharedMemorySize, int localSizeX, int localSizeY, int localSizeZ)
+        public ComputeShader GetComputeShader(
+            ulong gpuVa,
+            int localSizeX,
+            int localSizeY,
+            int localSizeZ,
+            int localMemorySize,
+            int sharedMemorySize)
         {
             bool isCached = _cpPrograms.TryGetValue(gpuVa, out List<ComputeShader> list);
 
@@ -71,7 +78,13 @@ namespace Ryujinx.Graphics.Gpu.Shader
                 }
             }
 
-            CachedShader shader = TranslateComputeShader(gpuVa, sharedMemorySize, localSizeX, localSizeY, localSizeZ);
+            CachedShader shader = TranslateComputeShader(
+                gpuVa,
+                localSizeX,
+                localSizeY,
+                localSizeZ,
+                localMemorySize,
+                sharedMemorySize);
 
             shader.HostShader = _context.Renderer.CompileShader(shader.Program);
 
@@ -237,12 +250,19 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// Translates the binary Maxwell shader code to something that the host API accepts.
         /// </summary>
         /// <param name="gpuVa">GPU virtual address of the binary shader code</param>
-        /// <param name="sharedMemorySize">Shared memory size of the compute shader</param>
         /// <param name="localSizeX">Local group size X of the computer shader</param>
         /// <param name="localSizeY">Local group size Y of the computer shader</param>
         /// <param name="localSizeZ">Local group size Z of the computer shader</param>
+        /// <param name="localMemorySize">Local memory size of the compute shader</param>
+        /// <param name="sharedMemorySize">Shared memory size of the compute shader</param>
         /// <returns>Compiled compute shader code</returns>
-        private CachedShader TranslateComputeShader(ulong gpuVa, int sharedMemorySize, int localSizeX, int localSizeY, int localSizeZ)
+        private CachedShader TranslateComputeShader(
+            ulong gpuVa,
+            int localSizeX,
+            int localSizeY,
+            int localSizeZ,
+            int localMemorySize,
+            int sharedMemorySize)
         {
             if (gpuVa == 0)
             {
@@ -256,6 +276,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
                     QueryInfoName.ComputeLocalSizeX => localSizeX,
                     QueryInfoName.ComputeLocalSizeY => localSizeY,
                     QueryInfoName.ComputeLocalSizeZ => localSizeZ,
+                    QueryInfoName.ComputeLocalMemorySize => localMemorySize,
                     QueryInfoName.ComputeSharedMemorySize => sharedMemorySize,
                     _ => QueryInfoCommon(info)
                 };
diff --git a/Ryujinx.Graphics.OpenGL/Program.cs b/Ryujinx.Graphics.OpenGL/Program.cs
index a8ee7ae895..fe14e9a9db 100644
--- a/Ryujinx.Graphics.OpenGL/Program.cs
+++ b/Ryujinx.Graphics.OpenGL/Program.cs
@@ -77,14 +77,7 @@ namespace Ryujinx.Graphics.OpenGL
 
             Bind();
 
-            int extraBlockindex = GL.GetUniformBlockIndex(Handle, "Extra");
-
-            if (extraBlockindex >= 0)
-            {
-                GL.UniformBlockBinding(Handle, extraBlockindex, 0);
-            }
-
-            int ubBindingPoint = 1;
+            int ubBindingPoint = 0;
             int sbBindingPoint = 0;
             int textureUnit    = 0;
             int imageUnit      = 0;
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs
index 200569c48e..2e7f9f1b07 100644
--- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs
@@ -47,25 +47,35 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
                 context.AppendLine();
             }
 
-            context.AppendLine("layout (std140) uniform Extra");
-
-            context.EnterScope();
-
-            context.AppendLine("vec2 flip;");
-            context.AppendLine("int instance;");
-
-            context.LeaveScope(";");
-
-            context.AppendLine();
-
-            context.AppendLine($"uint {DefaultNames.LocalMemoryName}[0x100];");
-            context.AppendLine();
-
             if (context.Config.Stage == ShaderStage.Compute)
             {
-                string size = NumberFormatter.FormatInt(BitUtils.DivRoundUp(context.Config.QueryInfo(QueryInfoName.ComputeSharedMemorySize), 4));
+                int localMemorySize = BitUtils.DivRoundUp(context.Config.QueryInfo(QueryInfoName.ComputeLocalMemorySize), 4);
 
-                context.AppendLine($"shared uint {DefaultNames.SharedMemoryName}[{size}];");
+                if (localMemorySize != 0)
+                {
+                    string localMemorySizeStr = NumberFormatter.FormatInt(localMemorySize);
+
+                    context.AppendLine($"uint {DefaultNames.LocalMemoryName}[{localMemorySizeStr}];");
+                    context.AppendLine();
+                }
+
+                int sharedMemorySize = BitUtils.DivRoundUp(context.Config.QueryInfo(QueryInfoName.ComputeSharedMemorySize), 4);
+
+                if (sharedMemorySize != 0)
+                {
+                    string sharedMemorySizeStr = NumberFormatter.FormatInt(sharedMemorySize);
+
+                    context.AppendLine($"shared uint {DefaultNames.SharedMemoryName}[{sharedMemorySizeStr}];");
+                    context.AppendLine();
+                }
+            }
+            else if (context.Config.LocalMemorySize != 0)
+            {
+                int localMemorySize = BitUtils.DivRoundUp(context.Config.LocalMemorySize, 4);
+
+                string localMemorySizeStr = NumberFormatter.FormatInt(localMemorySize);
+
+                context.AppendLine($"uint {DefaultNames.LocalMemoryName}[{localMemorySizeStr}];");
                 context.AppendLine();
             }
 
diff --git a/Ryujinx.Graphics.Shader/QueryInfoName.cs b/Ryujinx.Graphics.Shader/QueryInfoName.cs
index c4f2cb6cc2..887c0d7d14 100644
--- a/Ryujinx.Graphics.Shader/QueryInfoName.cs
+++ b/Ryujinx.Graphics.Shader/QueryInfoName.cs
@@ -5,6 +5,7 @@ namespace Ryujinx.Graphics.Shader
         ComputeLocalSizeX,
         ComputeLocalSizeY,
         ComputeLocalSizeZ,
+        ComputeLocalMemorySize,
         ComputeSharedMemorySize,
         IsTextureBuffer,
         IsTextureRectangle,
diff --git a/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs
index 8a0f25fe45..e3708b41d6 100644
--- a/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs
+++ b/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs
@@ -10,6 +10,8 @@ namespace Ryujinx.Graphics.Shader.Translation
 
         public int MaxOutputVertices { get; }
 
+        public int LocalMemorySize { get; }
+
         public OutputMapTarget[] OmapTargets    { get; }
         public bool              OmapSampleMask { get; }
         public bool              OmapDepth      { get; }
@@ -23,6 +25,7 @@ namespace Ryujinx.Graphics.Shader.Translation
             Stage             = ShaderStage.Compute;
             OutputTopology    = OutputTopology.PointList;
             MaxOutputVertices = 0;
+            LocalMemorySize   = 0;
             OmapTargets       = null;
             OmapSampleMask    = false;
             OmapDepth         = false;
@@ -35,6 +38,7 @@ namespace Ryujinx.Graphics.Shader.Translation
             Stage             = header.Stage;
             OutputTopology    = header.OutputTopology;
             MaxOutputVertices = header.MaxOutputVertexCount;
+            LocalMemorySize   = header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize;
             OmapTargets       = header.OmapTargets;
             OmapSampleMask    = header.OmapSampleMask;
             OmapDepth         = header.OmapDepth;
@@ -80,6 +84,8 @@ namespace Ryujinx.Graphics.Shader.Translation
                     case QueryInfoName.ComputeLocalSizeY:
                     case QueryInfoName.ComputeLocalSizeZ:
                         return 1;
+                    case QueryInfoName.ComputeLocalMemorySize:
+                        return 0x1000;
                     case QueryInfoName.ComputeSharedMemorySize:
                         return 0xc000;
                     case QueryInfoName.IsTextureBuffer: