diff --git a/Ryujinx.Graphics.Gpu/Engine/Compute.cs b/Ryujinx.Graphics.Gpu/Engine/Compute.cs
index d24d2d8d72..9178cfb0d8 100644
--- a/Ryujinx.Graphics.Gpu/Engine/Compute.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/Compute.cs
@@ -17,29 +17,31 @@ namespace Ryujinx.Graphics.Gpu.Engine
/// Method call argument
public void Dispatch(GpuState state, int argument)
{
- uint dispatchParamsAddress = (uint)state.Get(MethodOffset.DispatchParamsAddress);
+ uint qmdAddress = (uint)state.Get(MethodOffset.DispatchParamsAddress);
- var dispatchParams = _context.MemoryAccessor.Read((ulong)dispatchParamsAddress << 8);
+ var qmd = _context.MemoryAccessor.Read((ulong)qmdAddress << 8);
GpuVa shaderBaseAddress = state.Get(MethodOffset.ShaderBaseAddress);
- ulong shaderGpuVa = shaderBaseAddress.Pack() + (uint)dispatchParams.ShaderOffset;
+ ulong shaderGpuVa = shaderBaseAddress.Pack() + (uint)qmd.ProgramOffset;
- // Note: A size of 0 is also invalid, the size must be at least 1.
- int sharedMemorySize = Math.Clamp(dispatchParams.SharedMemorySize & 0xffff, 1, _context.Capabilities.MaximumComputeSharedMemorySize);
+ int localMemorySize = qmd.ShaderLocalMemoryLowSize + qmd.ShaderLocalMemoryHighSize;
+
+ int sharedMemorySize = Math.Min(qmd.SharedMemorySize, _context.Capabilities.MaximumComputeSharedMemorySize);
ComputeShader cs = ShaderCache.GetComputeShader(
shaderGpuVa,
- sharedMemorySize,
- dispatchParams.UnpackBlockSizeX(),
- dispatchParams.UnpackBlockSizeY(),
- dispatchParams.UnpackBlockSizeZ());
+ qmd.CtaThreadDimension0,
+ qmd.CtaThreadDimension1,
+ qmd.CtaThreadDimension2,
+ localMemorySize,
+ sharedMemorySize);
_context.Renderer.Pipeline.SetProgram(cs.HostProgram);
var samplerPool = state.Get(MethodOffset.SamplerPoolState);
- TextureManager.SetComputeSamplerPool(samplerPool.Address.Pack(), samplerPool.MaximumId, dispatchParams.SamplerIndex);
+ TextureManager.SetComputeSamplerPool(samplerPool.Address.Pack(), samplerPool.MaximumId, qmd.SamplerIndex);
var texturePool = state.Get(MethodOffset.TexturePoolState);
@@ -50,17 +52,19 @@ namespace Ryujinx.Graphics.Gpu.Engine
ShaderProgramInfo info = cs.Shader.Program.Info;
uint sbEnableMask = 0;
- uint ubEnableMask = dispatchParams.UnpackUniformBuffersEnableMask();
+ uint ubEnableMask = 0;
- for (int index = 0; index < dispatchParams.UniformBuffers.Length; index++)
+ for (int index = 0; index < Constants.TotalCpUniformBuffers; index++)
{
- if ((ubEnableMask & (1 << index)) == 0)
+ if (!qmd.ConstantBufferValid(index))
{
continue;
}
- ulong gpuVa = dispatchParams.UniformBuffers[index].PackAddress();
- ulong size = dispatchParams.UniformBuffers[index].UnpackSize();
+ ubEnableMask |= 1u << index;
+
+ ulong gpuVa = (uint)qmd.ConstantBufferAddrLower(index) | (ulong)qmd.ConstantBufferAddrUpper(index) << 32;
+ ulong size = (ulong)qmd.ConstantBufferSize(index);
BufferManager.SetComputeUniformBuffer(index, gpuVa, size);
}
@@ -131,9 +135,9 @@ namespace Ryujinx.Graphics.Gpu.Engine
TextureManager.CommitComputeBindings();
_context.Renderer.Pipeline.DispatchCompute(
- dispatchParams.UnpackGridSizeX(),
- dispatchParams.UnpackGridSizeY(),
- dispatchParams.UnpackGridSizeZ());
+ qmd.CtaRasterWidth,
+ qmd.CtaRasterHeight,
+ qmd.CtaRasterDepth);
UpdateShaderState(state);
}
diff --git a/Ryujinx.Graphics.Gpu/Engine/ComputeParams.cs b/Ryujinx.Graphics.Gpu/Engine/ComputeParams.cs
deleted file mode 100644
index c19b43d81e..0000000000
--- a/Ryujinx.Graphics.Gpu/Engine/ComputeParams.cs
+++ /dev/null
@@ -1,173 +0,0 @@
-using Ryujinx.Graphics.Gpu.State;
-using System;
-using System.Runtime.InteropServices;
-
-namespace Ryujinx.Graphics.Gpu.Engine
-{
- ///
- /// Compute uniform buffer parameters.
- ///
- struct UniformBufferParams
- {
- public int AddressLow;
- public int AddressHighAndSize;
-
- ///
- /// Packs the split address to a 64-bits integer.
- ///
- /// Uniform buffer GPU virtual address
- public ulong PackAddress()
- {
- return (uint)AddressLow | ((ulong)(AddressHighAndSize & 0xff) << 32);
- }
-
- ///
- /// Unpacks the uniform buffer size in bytes.
- ///
- /// Uniform buffer size in bytes
- public ulong UnpackSize()
- {
- return (ulong)((AddressHighAndSize >> 15) & 0x1ffff);
- }
- }
-
- ///
- /// Compute dispatch parameters.
- ///
- struct ComputeParams
- {
- public int Unknown0;
- public int Unknown1;
- public int Unknown2;
- public int Unknown3;
- public int Unknown4;
- public int Unknown5;
- public int Unknown6;
- public int Unknown7;
- public int ShaderOffset;
- public int Unknown9;
- public int Unknown10;
- public SamplerIndex SamplerIndex;
- public int GridSizeX;
- public int GridSizeYZ;
- public int Unknown14;
- public int Unknown15;
- public int Unknown16;
- public int SharedMemorySize;
- public int BlockSizeX;
- public int BlockSizeYZ;
- public int UniformBuffersConfig;
- public int Unknown21;
- public int Unknown22;
- public int Unknown23;
- public int Unknown24;
- public int Unknown25;
- public int Unknown26;
- public int Unknown27;
- public int Unknown28;
-
- private UniformBufferParams _uniformBuffer0;
- private UniformBufferParams _uniformBuffer1;
- private UniformBufferParams _uniformBuffer2;
- private UniformBufferParams _uniformBuffer3;
- private UniformBufferParams _uniformBuffer4;
- private UniformBufferParams _uniformBuffer5;
- private UniformBufferParams _uniformBuffer6;
- private UniformBufferParams _uniformBuffer7;
-
- ///
- /// Uniform buffer parameters.
- ///
- public Span UniformBuffers
- {
- get
- {
- return MemoryMarshal.CreateSpan(ref _uniformBuffer0, 8);
- }
- }
-
- public int Unknown45;
- public int Unknown46;
- public int Unknown47;
- public int Unknown48;
- public int Unknown49;
- public int Unknown50;
- public int Unknown51;
- public int Unknown52;
- public int Unknown53;
- public int Unknown54;
- public int Unknown55;
- public int Unknown56;
- public int Unknown57;
- public int Unknown58;
- public int Unknown59;
- public int Unknown60;
- public int Unknown61;
- public int Unknown62;
- public int Unknown63;
-
- ///
- /// Unpacks the work group X size.
- ///
- /// Work group X size
- public int UnpackGridSizeX()
- {
- return GridSizeX & 0x7fffffff;
- }
-
- ///
- /// Unpacks the work group Y size.
- ///
- /// Work group Y size
- public int UnpackGridSizeY()
- {
- return GridSizeYZ & 0xffff;
- }
-
- ///
- /// Unpacks the work group Z size.
- ///
- /// Work group Z size
- public int UnpackGridSizeZ()
- {
- return (GridSizeYZ >> 16) & 0xffff;
- }
-
- ///
- /// Unpacks the local group X size.
- ///
- /// Local group X size
- public int UnpackBlockSizeX()
- {
- return (BlockSizeX >> 16) & 0xffff;
- }
-
- ///
- /// Unpacks the local group Y size.
- ///
- /// Local group Y size
- public int UnpackBlockSizeY()
- {
- return BlockSizeYZ & 0xffff;
- }
-
- ///
- /// Unpacks the local group Z size.
- ///
- /// Local group Z size
- public int UnpackBlockSizeZ()
- {
- return (BlockSizeYZ >> 16) & 0xffff;
- }
-
- ///
- /// Unpacks the uniform buffers enable mask.
- /// Each bit set on the mask indicates that the respective buffer index is enabled.
- ///
- /// Uniform buffers enable mask
- public uint UnpackUniformBuffersEnableMask()
- {
- return (uint)UniformBuffersConfig & 0xff;
- }
- }
-}
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Gpu/Engine/ComputeQmd.cs b/Ryujinx.Graphics.Gpu/Engine/ComputeQmd.cs
new file mode 100644
index 0000000000..35418c2d80
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Engine/ComputeQmd.cs
@@ -0,0 +1,275 @@
+using Ryujinx.Graphics.Gpu.State;
+using System;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine
+{
+ ///
+ /// Type of the dependent Queue Meta Data.
+ ///
+ enum DependentQmdType
+ {
+ Queue,
+ Grid
+ }
+
+ ///
+ /// Type of the release memory barrier.
+ ///
+ enum ReleaseMembarType
+ {
+ FeNone,
+ FeSysmembar
+ }
+
+ ///
+ /// Type of the CWD memory barrier.
+ ///
+ enum CwdMembarType
+ {
+ L1None,
+ L1Sysmembar,
+ L1Membar
+ }
+
+ ///
+ /// NaN behavior of 32-bits float operations on the shader.
+ ///
+ enum Fp32NanBehavior
+ {
+ Legacy,
+ Fp64Compatible
+ }
+
+ ///
+ /// NaN behavior of 32-bits float to integer conversion on the shader.
+ ///
+ enum Fp32F2iNanBehavior
+ {
+ PassZero,
+ PassIndefinite
+ }
+
+ ///
+ /// Limit of calls.
+ ///
+ enum ApiVisibleCallLimit
+ {
+ _32,
+ NoCheck
+ }
+
+ ///
+ /// Shared memory bank mapping mode.
+ ///
+ enum SharedMemoryBankMapping
+ {
+ FourBytesPerBank,
+ EightBytesPerBank
+ }
+
+ ///
+ /// Denormal behavior of 32-bits float narrowing instructions.
+ ///
+ enum Fp32NarrowInstruction
+ {
+ KeepDenorms,
+ FlushDenorms
+ }
+
+ ///
+ /// Configuration of the L1 cache.
+ ///
+ enum L1Configuration
+ {
+ DirectlyAddressableMemorySize16kb,
+ DirectlyAddressableMemorySize32kb,
+ DirectlyAddressableMemorySize48kb
+ }
+
+ ///
+ /// Reduction operation.
+ ///
+ enum ReductionOp
+ {
+ RedAdd,
+ RedMin,
+ RedMax,
+ RedInc,
+ RedDec,
+ RedAnd,
+ RedOr,
+ RedXor
+ }
+
+ ///
+ /// Reduction format.
+ ///
+ enum ReductionFormat
+ {
+ Unsigned32,
+ Signed32
+ }
+
+ ///
+ /// Size of a structure in words.
+ ///
+ enum StructureSize
+ {
+ FourWords,
+ OneWord
+ }
+
+ ///
+ /// Compute Queue Meta Data.
+ ///
+ unsafe struct ComputeQmd
+ {
+ private fixed int _words[64];
+
+ public int OuterPut => BitRange(30, 0);
+ public bool OuterOverflow => Bit(31);
+ public int OuterGet => BitRange(62, 32);
+ public bool OuterStickyOverflow => Bit(63);
+ public int InnerGet => BitRange(94, 64);
+ public bool InnerOverflow => Bit(95);
+ public int InnerPut => BitRange(126, 96);
+ public bool InnerStickyOverflow => Bit(127);
+ public int QmdReservedAA => BitRange(159, 128);
+ public int DependentQmdPointer => BitRange(191, 160);
+ public int QmdGroupId => BitRange(197, 192);
+ public bool SmGlobalCachingEnable => Bit(198);
+ public bool RunCtaInOneSmPartition => Bit(199);
+ public bool IsQueue => Bit(200);
+ public bool AddToHeadOfQmdGroupLinkedList => Bit(201);
+ public bool SemaphoreReleaseEnable0 => Bit(202);
+ public bool SemaphoreReleaseEnable1 => Bit(203);
+ public bool RequireSchedulingPcas => Bit(204);
+ public bool DependentQmdScheduleEnable => Bit(205);
+ public DependentQmdType DependentQmdType => (DependentQmdType)BitRange(206, 206);
+ public bool DependentQmdFieldCopy => Bit(207);
+ public int QmdReservedB => BitRange(223, 208);
+ public int CircularQueueSize => BitRange(248, 224);
+ public bool QmdReservedC => Bit(249);
+ public bool InvalidateTextureHeaderCache => Bit(250);
+ public bool InvalidateTextureSamplerCache => Bit(251);
+ public bool InvalidateTextureDataCache => Bit(252);
+ public bool InvalidateShaderDataCache => Bit(253);
+ public bool InvalidateInstructionCache => Bit(254);
+ public bool InvalidateShaderConstantCache => Bit(255);
+ public int ProgramOffset => BitRange(287, 256);
+ public int CircularQueueAddrLower => BitRange(319, 288);
+ public int CircularQueueAddrUpper => BitRange(327, 320);
+ public int QmdReservedD => BitRange(335, 328);
+ public int CircularQueueEntrySize => BitRange(351, 336);
+ public int CwdReferenceCountId => BitRange(357, 352);
+ public int CwdReferenceCountDeltaMinusOne => BitRange(365, 358);
+ public ReleaseMembarType ReleaseMembarType => (ReleaseMembarType)BitRange(366, 366);
+ public bool CwdReferenceCountIncrEnable => Bit(367);
+ public CwdMembarType CwdMembarType => (CwdMembarType)BitRange(369, 368);
+ public bool SequentiallyRunCtas => Bit(370);
+ public bool CwdReferenceCountDecrEnable => Bit(371);
+ public bool Throttled => Bit(372);
+ public Fp32NanBehavior Fp32NanBehavior => (Fp32NanBehavior)BitRange(376, 376);
+ public Fp32F2iNanBehavior Fp32F2iNanBehavior => (Fp32F2iNanBehavior)BitRange(377, 377);
+ public ApiVisibleCallLimit ApiVisibleCallLimit => (ApiVisibleCallLimit)BitRange(378, 378);
+ public SharedMemoryBankMapping SharedMemoryBankMapping => (SharedMemoryBankMapping)BitRange(379, 379);
+ public SamplerIndex SamplerIndex => (SamplerIndex)BitRange(382, 382);
+ public Fp32NarrowInstruction Fp32NarrowInstruction => (Fp32NarrowInstruction)BitRange(383, 383);
+ public int CtaRasterWidth => BitRange(415, 384);
+ public int CtaRasterHeight => BitRange(431, 416);
+ public int CtaRasterDepth => BitRange(447, 432);
+ public int CtaRasterWidthResume => BitRange(479, 448);
+ public int CtaRasterHeightResume => BitRange(495, 480);
+ public int CtaRasterDepthResume => BitRange(511, 496);
+ public int QueueEntriesPerCtaMinusOne => BitRange(518, 512);
+ public int CoalesceWaitingPeriod => BitRange(529, 522);
+ public int SharedMemorySize => BitRange(561, 544);
+ public int QmdReservedG => BitRange(575, 562);
+ public int QmdVersion => BitRange(579, 576);
+ public int QmdMajorVersion => BitRange(583, 580);
+ public int QmdReservedH => BitRange(591, 584);
+ public int CtaThreadDimension0 => BitRange(607, 592);
+ public int CtaThreadDimension1 => BitRange(623, 608);
+ public int CtaThreadDimension2 => BitRange(639, 624);
+ public bool ConstantBufferValid(int i) => Bit(640 + i * 1);
+ public int QmdReservedI => BitRange(668, 648);
+ public L1Configuration L1Configuration => (L1Configuration)BitRange(671, 669);
+ public int SmDisableMaskLower => BitRange(703, 672);
+ public int SmDisableMaskUpper => BitRange(735, 704);
+ public int Release0AddressLower => BitRange(767, 736);
+ public int Release0AddressUpper => BitRange(775, 768);
+ public int QmdReservedJ => BitRange(783, 776);
+ public ReductionOp Release0ReductionOp => (ReductionOp)BitRange(790, 788);
+ public bool QmdReservedK => Bit(791);
+ public ReductionFormat Release0ReductionFormat => (ReductionFormat)BitRange(793, 792);
+ public bool Release0ReductionEnable => Bit(794);
+ public StructureSize Release0StructureSize => (StructureSize)BitRange(799, 799);
+ public int Release0Payload => BitRange(831, 800);
+ public int Release1AddressLower => BitRange(863, 832);
+ public int Release1AddressUpper => BitRange(871, 864);
+ public int QmdReservedL => BitRange(879, 872);
+ public ReductionOp Release1ReductionOp => (ReductionOp)BitRange(886, 884);
+ public bool QmdReservedM => Bit(887);
+ public ReductionFormat Release1ReductionFormat => (ReductionFormat)BitRange(889, 888);
+ public bool Release1ReductionEnable => Bit(890);
+ public StructureSize Release1StructureSize => (StructureSize)BitRange(895, 895);
+ public int Release1Payload => BitRange(927, 896);
+ public int ConstantBufferAddrLower(int i) => BitRange(959 + i * 64, 928 + i * 64);
+ public int ConstantBufferAddrUpper(int i) => BitRange(967 + i * 64, 960 + i * 64);
+ public int ConstantBufferReservedAddr(int i) => BitRange(973 + i * 64, 968 + i * 64);
+ public bool ConstantBufferInvalidate(int i) => Bit(974 + i * 64);
+ public int ConstantBufferSize(int i) => BitRange(991 + i * 64, 975 + i * 64);
+ public int ShaderLocalMemoryLowSize => BitRange(1463, 1440);
+ public int QmdReservedN => BitRange(1466, 1464);
+ public int BarrierCount => BitRange(1471, 1467);
+ public int ShaderLocalMemoryHighSize => BitRange(1495, 1472);
+ public int RegisterCount => BitRange(1503, 1496);
+ public int ShaderLocalMemoryCrsSize => BitRange(1527, 1504);
+ public int SassVersion => BitRange(1535, 1528);
+ public int HwOnlyInnerGet => BitRange(1566, 1536);
+ public bool HwOnlyRequireSchedulingPcas => Bit(1567);
+ public int HwOnlyInnerPut => BitRange(1598, 1568);
+ public bool HwOnlyScgType => Bit(1599);
+ public int HwOnlySpanListHeadIndex => BitRange(1629, 1600);
+ public bool QmdReservedQ => Bit(1630);
+ public bool HwOnlySpanListHeadIndexValid => Bit(1631);
+ public int HwOnlySkedNextQmdPointer => BitRange(1663, 1632);
+ public int QmdSpareE => BitRange(1695, 1664);
+ public int QmdSpareF => BitRange(1727, 1696);
+ public int QmdSpareG => BitRange(1759, 1728);
+ public int QmdSpareH => BitRange(1791, 1760);
+ public int QmdSpareI => BitRange(1823, 1792);
+ public int QmdSpareJ => BitRange(1855, 1824);
+ public int QmdSpareK => BitRange(1887, 1856);
+ public int QmdSpareL => BitRange(1919, 1888);
+ public int QmdSpareM => BitRange(1951, 1920);
+ public int QmdSpareN => BitRange(1983, 1952);
+ public int DebugIdUpper => BitRange(2015, 1984);
+ public int DebugIdLower => BitRange(2047, 2016);
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private bool Bit(int bit)
+ {
+ if ((uint)bit >= 64 * 32)
+ {
+ throw new ArgumentOutOfRangeException(nameof(bit));
+ }
+
+ return (_words[bit >> 5] & (1 << (bit & 31))) != 0;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private int BitRange(int upper, int lower)
+ {
+ if ((uint)lower >= 64 * 32)
+ {
+ throw new ArgumentOutOfRangeException(nameof(lower));
+ }
+
+ int mask = (int)(uint.MaxValue >> (32 - (upper - lower + 1)));
+
+ return (_words[lower >> 5] >> (lower & 31)) & mask;
+ }
+ }
+}
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Gpu/Ryujinx.Graphics.Gpu.csproj b/Ryujinx.Graphics.Gpu/Ryujinx.Graphics.Gpu.csproj
index b9751508ef..a55c4d1ceb 100644
--- a/Ryujinx.Graphics.Gpu/Ryujinx.Graphics.Gpu.csproj
+++ b/Ryujinx.Graphics.Gpu/Ryujinx.Graphics.Gpu.csproj
@@ -13,4 +13,12 @@
win-x64;osx-x64;linux-x64
+
+ true
+
+
+
+ true
+
+
diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
index dad1b0ac2e..8aa9b1c7b5 100644
--- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
@@ -51,12 +51,19 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// This automatically translates, compiles and adds the code to the cache if not present.
///
/// GPU virtual address of the binary shader code
- /// Shared memory size of the compute shader
/// Local group size X of the computer shader
/// Local group size Y of the computer shader
/// Local group size Z of the computer shader
+ /// Local memory size of the compute shader
+ /// Shared memory size of the compute shader
/// Compiled compute shader code
- public ComputeShader GetComputeShader(ulong gpuVa, int sharedMemorySize, int localSizeX, int localSizeY, int localSizeZ)
+ public ComputeShader GetComputeShader(
+ ulong gpuVa,
+ int localSizeX,
+ int localSizeY,
+ int localSizeZ,
+ int localMemorySize,
+ int sharedMemorySize)
{
bool isCached = _cpPrograms.TryGetValue(gpuVa, out List list);
@@ -71,7 +78,13 @@ namespace Ryujinx.Graphics.Gpu.Shader
}
}
- CachedShader shader = TranslateComputeShader(gpuVa, sharedMemorySize, localSizeX, localSizeY, localSizeZ);
+ CachedShader shader = TranslateComputeShader(
+ gpuVa,
+ localSizeX,
+ localSizeY,
+ localSizeZ,
+ localMemorySize,
+ sharedMemorySize);
shader.HostShader = _context.Renderer.CompileShader(shader.Program);
@@ -237,12 +250,19 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// Translates the binary Maxwell shader code to something that the host API accepts.
///
/// GPU virtual address of the binary shader code
- /// Shared memory size of the compute shader
/// Local group size X of the computer shader
/// Local group size Y of the computer shader
/// Local group size Z of the computer shader
+ /// Local memory size of the compute shader
+ /// Shared memory size of the compute shader
/// Compiled compute shader code
- private CachedShader TranslateComputeShader(ulong gpuVa, int sharedMemorySize, int localSizeX, int localSizeY, int localSizeZ)
+ private CachedShader TranslateComputeShader(
+ ulong gpuVa,
+ int localSizeX,
+ int localSizeY,
+ int localSizeZ,
+ int localMemorySize,
+ int sharedMemorySize)
{
if (gpuVa == 0)
{
@@ -256,6 +276,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
QueryInfoName.ComputeLocalSizeX => localSizeX,
QueryInfoName.ComputeLocalSizeY => localSizeY,
QueryInfoName.ComputeLocalSizeZ => localSizeZ,
+ QueryInfoName.ComputeLocalMemorySize => localMemorySize,
QueryInfoName.ComputeSharedMemorySize => sharedMemorySize,
_ => QueryInfoCommon(info)
};
diff --git a/Ryujinx.Graphics.OpenGL/Program.cs b/Ryujinx.Graphics.OpenGL/Program.cs
index a8ee7ae895..fe14e9a9db 100644
--- a/Ryujinx.Graphics.OpenGL/Program.cs
+++ b/Ryujinx.Graphics.OpenGL/Program.cs
@@ -77,14 +77,7 @@ namespace Ryujinx.Graphics.OpenGL
Bind();
- int extraBlockindex = GL.GetUniformBlockIndex(Handle, "Extra");
-
- if (extraBlockindex >= 0)
- {
- GL.UniformBlockBinding(Handle, extraBlockindex, 0);
- }
-
- int ubBindingPoint = 1;
+ int ubBindingPoint = 0;
int sbBindingPoint = 0;
int textureUnit = 0;
int imageUnit = 0;
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs
index 200569c48e..2e7f9f1b07 100644
--- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs
@@ -47,25 +47,35 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
context.AppendLine();
}
- context.AppendLine("layout (std140) uniform Extra");
-
- context.EnterScope();
-
- context.AppendLine("vec2 flip;");
- context.AppendLine("int instance;");
-
- context.LeaveScope(";");
-
- context.AppendLine();
-
- context.AppendLine($"uint {DefaultNames.LocalMemoryName}[0x100];");
- context.AppendLine();
-
if (context.Config.Stage == ShaderStage.Compute)
{
- string size = NumberFormatter.FormatInt(BitUtils.DivRoundUp(context.Config.QueryInfo(QueryInfoName.ComputeSharedMemorySize), 4));
+ int localMemorySize = BitUtils.DivRoundUp(context.Config.QueryInfo(QueryInfoName.ComputeLocalMemorySize), 4);
- context.AppendLine($"shared uint {DefaultNames.SharedMemoryName}[{size}];");
+ if (localMemorySize != 0)
+ {
+ string localMemorySizeStr = NumberFormatter.FormatInt(localMemorySize);
+
+ context.AppendLine($"uint {DefaultNames.LocalMemoryName}[{localMemorySizeStr}];");
+ context.AppendLine();
+ }
+
+ int sharedMemorySize = BitUtils.DivRoundUp(context.Config.QueryInfo(QueryInfoName.ComputeSharedMemorySize), 4);
+
+ if (sharedMemorySize != 0)
+ {
+ string sharedMemorySizeStr = NumberFormatter.FormatInt(sharedMemorySize);
+
+ context.AppendLine($"shared uint {DefaultNames.SharedMemoryName}[{sharedMemorySizeStr}];");
+ context.AppendLine();
+ }
+ }
+ else if (context.Config.LocalMemorySize != 0)
+ {
+ int localMemorySize = BitUtils.DivRoundUp(context.Config.LocalMemorySize, 4);
+
+ string localMemorySizeStr = NumberFormatter.FormatInt(localMemorySize);
+
+ context.AppendLine($"uint {DefaultNames.LocalMemoryName}[{localMemorySizeStr}];");
context.AppendLine();
}
diff --git a/Ryujinx.Graphics.Shader/QueryInfoName.cs b/Ryujinx.Graphics.Shader/QueryInfoName.cs
index c4f2cb6cc2..887c0d7d14 100644
--- a/Ryujinx.Graphics.Shader/QueryInfoName.cs
+++ b/Ryujinx.Graphics.Shader/QueryInfoName.cs
@@ -5,6 +5,7 @@ namespace Ryujinx.Graphics.Shader
ComputeLocalSizeX,
ComputeLocalSizeY,
ComputeLocalSizeZ,
+ ComputeLocalMemorySize,
ComputeSharedMemorySize,
IsTextureBuffer,
IsTextureRectangle,
diff --git a/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs
index 8a0f25fe45..e3708b41d6 100644
--- a/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs
+++ b/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs
@@ -10,6 +10,8 @@ namespace Ryujinx.Graphics.Shader.Translation
public int MaxOutputVertices { get; }
+ public int LocalMemorySize { get; }
+
public OutputMapTarget[] OmapTargets { get; }
public bool OmapSampleMask { get; }
public bool OmapDepth { get; }
@@ -23,6 +25,7 @@ namespace Ryujinx.Graphics.Shader.Translation
Stage = ShaderStage.Compute;
OutputTopology = OutputTopology.PointList;
MaxOutputVertices = 0;
+ LocalMemorySize = 0;
OmapTargets = null;
OmapSampleMask = false;
OmapDepth = false;
@@ -35,6 +38,7 @@ namespace Ryujinx.Graphics.Shader.Translation
Stage = header.Stage;
OutputTopology = header.OutputTopology;
MaxOutputVertices = header.MaxOutputVertexCount;
+ LocalMemorySize = header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize;
OmapTargets = header.OmapTargets;
OmapSampleMask = header.OmapSampleMask;
OmapDepth = header.OmapDepth;
@@ -80,6 +84,8 @@ namespace Ryujinx.Graphics.Shader.Translation
case QueryInfoName.ComputeLocalSizeY:
case QueryInfoName.ComputeLocalSizeZ:
return 1;
+ case QueryInfoName.ComputeLocalMemorySize:
+ return 0x1000;
case QueryInfoName.ComputeSharedMemorySize:
return 0xc000;
case QueryInfoName.IsTextureBuffer: