From 09dfefed1f84585e2b305cd16482f899b93fe629 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Tue, 26 Jun 2018 02:10:54 -0300
Subject: [PATCH] Implementation of UBOs instead of uniform constant arrays
 (#186)

* Sort uniform binding to avoid possible failures in drivers fewer bindings

* Throw exception for Cbuf overflow

* Search for free bindings instead of using locked ones

* EnsureAllocated when binding buffers

* Fix uniform bindings

* Remove spaces

* Use 64 KiB UBOs when available

* Remove double colon

* Use IdentationStr and avoid division in Cbuf offset

* Add spaces
---
 Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs      | 116 +++++++++++++++---
 .../Gal/OpenGL/OGLStreamBuffer.cs             | 113 +++++++++++++++++
 Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs |  14 ++-
 3 files changed, 223 insertions(+), 20 deletions(-)
 create mode 100644 Ryujinx.Graphics/Gal/OpenGL/OGLStreamBuffer.cs

diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs
index 5760d172af..28fa8728ef 100644
--- a/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs
+++ b/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs
@@ -69,6 +69,8 @@ namespace Ryujinx.Graphics.Gal.OpenGL
             public ShaderStage Fragment;
         }
 
+        const int ConstBuffersPerStage = 18;
+
         private ShaderProgram Current;
 
         private ConcurrentDictionary<long, ShaderStage> Stages;
@@ -77,11 +79,20 @@ namespace Ryujinx.Graphics.Gal.OpenGL
 
         public int CurrentProgramHandle { get; private set; }
 
+        private OGLStreamBuffer[][] ConstBuffers;
+
         public OGLShader()
         {
             Stages = new ConcurrentDictionary<long, ShaderStage>();
 
             Programs = new Dictionary<ShaderProgram, int>();
+
+            ConstBuffers = new OGLStreamBuffer[5][];
+
+            for (int i = 0; i < 5; i++)
+            {
+                ConstBuffers[i] = new OGLStreamBuffer[ConstBuffersPerStage];
+            }
         }
 
         public void Create(IGalMemory Memory, long Key, GalShaderType Type)
@@ -119,27 +130,19 @@ namespace Ryujinx.Graphics.Gal.OpenGL
 
         public void SetConstBuffer(long Key, int Cbuf, byte[] Data)
         {
-            BindProgram();
-
             if (Stages.TryGetValue(Key, out ShaderStage Stage))
             {
                 foreach (ShaderDeclInfo DeclInfo in Stage.UniformUsage.Where(x => x.Cbuf == Cbuf))
                 {
-                    int Location = GL.GetUniformLocation(CurrentProgramHandle, DeclInfo.Name);
+                    OGLStreamBuffer Buffer = GetConstBuffer(Stage.Type, Cbuf);
 
-                    int Count = Data.Length >> 2;
+                    int Size = Math.Min(Data.Length, Buffer.Size);
 
-                    //The Index is the index of the last element,
-                    //so we can add 1 to get the uniform array size.
-                    Count = Math.Min(Count, DeclInfo.Index + 1);
+                    byte[] Destiny = Buffer.Map(Size);
 
-                    unsafe
-                    {
-                        fixed (byte* Ptr = Data)
-                        {
-                            GL.Uniform1(Location, Count, (float*)Ptr);
-                        }
-                    }
+                    Array.Copy(Data, Destiny, Size);
+
+                    Buffer.Unmap(Size);
                 }
             }
         }
@@ -204,11 +207,15 @@ namespace Ryujinx.Graphics.Gal.OpenGL
 
                 CheckProgramLink(Handle);
 
+                BindUniformBlocks(Handle);
+
                 Programs.Add(Current, Handle);
             }
 
             GL.UseProgram(Handle);
 
+            BindUniformBuffers(Handle);
+
             CurrentProgramHandle = Handle;
         }
 
@@ -222,6 +229,87 @@ namespace Ryujinx.Graphics.Gal.OpenGL
             }
         }
 
+        private void BindUniformBlocks(int ProgramHandle)
+        {
+            int FreeBinding = 0;
+
+            int BindUniformBlocksIfNotNull(ShaderStage Stage)
+            {
+                if (Stage != null)
+                {
+                    foreach (ShaderDeclInfo DeclInfo in Stage.UniformUsage)
+                    {
+                        int BlockIndex = GL.GetUniformBlockIndex(ProgramHandle, DeclInfo.Name);
+
+                        if (BlockIndex < 0)
+                        {
+                            //It is expected that its found, if it's not then driver might be in a malfunction
+                            throw new InvalidOperationException();
+                        }
+
+                        GL.UniformBlockBinding(ProgramHandle, BlockIndex, FreeBinding);
+
+                        FreeBinding++;
+                    }
+                }
+
+                return FreeBinding;
+            }
+
+            BindUniformBlocksIfNotNull(Current.Vertex);
+            BindUniformBlocksIfNotNull(Current.TessControl);
+            BindUniformBlocksIfNotNull(Current.TessEvaluation);
+            BindUniformBlocksIfNotNull(Current.Geometry);
+            BindUniformBlocksIfNotNull(Current.Fragment);
+        }
+
+        private void BindUniformBuffers(int ProgramHandle)
+        {
+            int FreeBinding = 0;
+
+            int BindUniformBuffersIfNotNull(ShaderStage Stage)
+            {
+                if (Stage != null)
+                {
+                    foreach (ShaderDeclInfo DeclInfo in Stage.UniformUsage)
+                    {
+                        OGLStreamBuffer Buffer = GetConstBuffer(Stage.Type, DeclInfo.Cbuf);
+
+                        GL.BindBufferBase(BufferRangeTarget.UniformBuffer, FreeBinding, Buffer.Handle);
+
+                        FreeBinding++;
+                    }
+                }
+
+                return FreeBinding;
+            }
+            
+            BindUniformBuffersIfNotNull(Current.Vertex);
+            BindUniformBuffersIfNotNull(Current.TessControl);
+            BindUniformBuffersIfNotNull(Current.TessEvaluation);
+            BindUniformBuffersIfNotNull(Current.Geometry);
+            BindUniformBuffersIfNotNull(Current.Fragment);
+        }
+
+        private OGLStreamBuffer GetConstBuffer(GalShaderType StageType, int Cbuf)
+        {
+            int StageIndex = (int)StageType;
+
+            OGLStreamBuffer Buffer = ConstBuffers[StageIndex][Cbuf];
+
+            if (Buffer == null)
+            {
+                //Allocate a maximum of 64 KiB
+                int Size = Math.Min(GL.GetInteger(GetPName.MaxUniformBlockSize), 64 * 1024);
+
+                Buffer = OGLStreamBuffer.Create(BufferTarget.UniformBuffer, Size);
+
+                ConstBuffers[StageIndex][Cbuf] = Buffer;
+            }
+
+            return Buffer;
+        }
+
         public static void CompileAndCheck(int Handle, string Code)
         {
             GL.ShaderSource(Handle, Code);
diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLStreamBuffer.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLStreamBuffer.cs
new file mode 100644
index 0000000000..3d91b09f78
--- /dev/null
+++ b/Ryujinx.Graphics/Gal/OpenGL/OGLStreamBuffer.cs
@@ -0,0 +1,113 @@
+using System;
+using OpenTK.Graphics.OpenGL;
+
+namespace Ryujinx.Graphics.Gal.OpenGL
+{
+    abstract class OGLStreamBuffer : IDisposable
+    {
+        public int Handle { get; protected set; }
+
+        public int Size { get; protected set; }
+
+        protected BufferTarget Target { get; private set; }
+
+        private bool Mapped = false;
+
+        public OGLStreamBuffer(BufferTarget Target, int MaxSize)
+        {
+            Handle = 0;
+            Mapped = false;
+
+            this.Target = Target;
+            this.Size = MaxSize;
+        }
+
+        public static OGLStreamBuffer Create(BufferTarget Target, int MaxSize)
+        {
+            //TODO: Query here for ARB_buffer_storage and use when available
+            return new SubDataBuffer(Target, MaxSize);
+        }
+
+        public byte[] Map(int Size)
+        {
+            if (Handle == 0 || Mapped || Size > this.Size)
+            {
+                throw new InvalidOperationException();
+            }
+
+            byte[] Memory = InternMap(Size);
+
+            Mapped = true;
+
+            return Memory;
+        }
+
+        public void Unmap(int UsedSize)
+        {
+            if (Handle == 0 || !Mapped)
+            {
+                throw new InvalidOperationException();
+            }
+
+            InternUnmap(UsedSize);
+
+            Mapped = false;
+        }
+
+        protected abstract byte[] InternMap(int Size);
+
+        protected abstract void InternUnmap(int UsedSize);
+
+        public void Dispose()
+        {
+            Dispose(true);
+        }
+
+        protected virtual void Dispose(bool Disposing)
+        {
+            if (Disposing && Handle != 0)
+            {
+                GL.DeleteBuffer(Handle);
+
+                Handle = 0;
+            }
+        }
+    }
+
+    class SubDataBuffer : OGLStreamBuffer
+    {
+        private byte[] Memory;
+
+        public SubDataBuffer(BufferTarget Target, int MaxSize)
+            : base(Target, MaxSize)
+        {
+            Memory = new byte[MaxSize];
+
+            GL.CreateBuffers(1, out int Handle);
+
+            GL.BindBuffer(Target, Handle);
+
+            GL.BufferData(Target, Size, IntPtr.Zero, BufferUsageHint.StreamDraw);
+
+            this.Handle = Handle;
+        }
+
+        protected override byte[] InternMap(int Size)
+        {
+            return Memory;
+        }
+
+        protected override void InternUnmap(int UsedSize)
+        {
+            GL.BindBuffer(Target, Handle);
+            
+            unsafe
+            {
+                fixed (byte* MemoryPtr = Memory)
+                {
+                    GL.BufferSubData(Target, IntPtr.Zero, UsedSize, (IntPtr)MemoryPtr);
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs b/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs
index 24db303f28..576358c79c 100644
--- a/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs
+++ b/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs
@@ -145,7 +145,9 @@ namespace Ryujinx.Graphics.Gal.Shader
 
             foreach (ShaderDeclInfo DeclInfo in Decl.Uniforms.Values.OrderBy(DeclKeySelector))
             {
-                SB.AppendLine($"uniform {GetDecl(DeclInfo)}[{DeclInfo.Index + 1}];");
+                SB.AppendLine($"layout (std140) uniform {DeclInfo.Name} {{");
+                SB.AppendLine($"{IdentationStr}vec4 {DeclInfo.Name}_data[{DeclInfo.Index / 4 + 1}];");
+                SB.AppendLine($"}};");
             }
 
             if (Decl.Uniforms.Count > 0)
@@ -530,15 +532,15 @@ namespace Ryujinx.Graphics.Gal.Shader
 
             if (Cbuf.Offs != null)
             {
-                //Note: We assume that the register value is always a multiple of 4.
-                //This may not be aways the case.
-                string Offset = "(floatBitsToInt(" + GetSrcExpr(Cbuf.Offs) + ") >> 2)";
+                string Offset = "floatBitsToInt(" + GetSrcExpr(Cbuf.Offs) + ")";
 
-                return DeclInfo.Name + "[" + Cbuf.Pos + " + " + Offset + "]";
+                string Index = "(" + Cbuf.Pos * 4 + " + " + Offset + ")";
+
+                return $"{DeclInfo.Name}_data[{Index} / 16][({Index} / 4) % 4]";
             }
             else
             {
-                return DeclInfo.Name + "[" + Cbuf.Pos + "]";
+                return $"{DeclInfo.Name}_data[{Cbuf.Pos / 4}][{Cbuf.Pos % 4}]";
             }
         }