From 59964f667c38d9d0550a3b5ef3970433493f4991 Mon Sep 17 00:00:00 2001
From: gdkchan <gab.dark.100@gmail.com>
Date: Wed, 28 Nov 2018 21:09:44 -0200
Subject: [PATCH] =?UTF-8?q?Add=20support=20for=20bigger=20UBOs,=20fix=20sR?=
 =?UTF-8?q?GB=20regression,=20small=20improvement=20t=E2=80=A6=20(#503)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Add support for bigger UBOs, fix sRGB regression, small improvement to the 2D copy engine

* Break into multiple lines

* Read fractions for source/step values on the 2d copy engine aswell

* Use fixed point math for more speed

* Fix reinterpret when texture sizes are different
---
 Ryujinx.Graphics/Gal/OpenGL/OGLLimit.cs       | 12 +++++++
 Ryujinx.Graphics/Gal/OpenGL/OGLPipeline.cs    | 22 +++++++++----
 .../Gal/OpenGL/OGLRenderTarget.cs             | 33 ++++++++++++++++---
 Ryujinx.Graphics/Gal/OpenGL/OGLRenderer.cs    |  6 +++-
 Ryujinx.Graphics/Gal/Shader/GlslDecl.cs       |  8 ++---
 Ryujinx.Graphics/NvGpuEngine2d.cs             | 33 ++++++++++++++-----
 6 files changed, 90 insertions(+), 24 deletions(-)
 create mode 100644 Ryujinx.Graphics/Gal/OpenGL/OGLLimit.cs

diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLLimit.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLLimit.cs
new file mode 100644
index 0000000000..6c385bc4a2
--- /dev/null
+++ b/Ryujinx.Graphics/Gal/OpenGL/OGLLimit.cs
@@ -0,0 +1,12 @@
+using OpenTK.Graphics.OpenGL;
+using System;
+
+namespace Ryujinx.Graphics.Gal.OpenGL
+{
+    static class OGLLimit
+    {
+        private static Lazy<int> s_MaxUboSize = new Lazy<int>(() => GL.GetInteger(GetPName.MaxUniformBlockSize));
+
+        public static int MaxUboSize => s_MaxUboSize.Value;
+    }
+}
\ No newline at end of file
diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLPipeline.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLPipeline.cs
index ac12314cdf..eb7f958b92 100644
--- a/Ryujinx.Graphics/Gal/OpenGL/OGLPipeline.cs
+++ b/Ryujinx.Graphics/Gal/OpenGL/OGLPipeline.cs
@@ -77,17 +77,23 @@ namespace Ryujinx.Graphics.Gal.OpenGL
 
         private GalPipelineState Old;
 
-        private OGLConstBuffer Buffer;
-        private OGLRasterizer Rasterizer;
-        private OGLShader Shader;
+        private OGLConstBuffer  Buffer;
+        private OGLRenderTarget RenderTarget;
+        private OGLRasterizer   Rasterizer;
+        private OGLShader       Shader;
 
         private int VaoHandle;
 
-        public OGLPipeline(OGLConstBuffer Buffer, OGLRasterizer Rasterizer, OGLShader Shader)
+        public OGLPipeline(
+            OGLConstBuffer  Buffer,
+            OGLRenderTarget RenderTarget,
+            OGLRasterizer   Rasterizer,
+            OGLShader       Shader)
         {
-            this.Buffer     = Buffer;
-            this.Rasterizer = Rasterizer;
-            this.Shader     = Shader;
+            this.Buffer       = Buffer;
+            this.RenderTarget = RenderTarget;
+            this.Rasterizer   = Rasterizer;
+            this.Shader       = Shader;
 
             //These values match OpenGL's defaults
             Old = new GalPipelineState
@@ -144,6 +150,8 @@ namespace Ryujinx.Graphics.Gal.OpenGL
             if (New.FramebufferSrgb != Old.FramebufferSrgb)
             {
                 Enable(EnableCap.FramebufferSrgb, New.FramebufferSrgb);
+
+                RenderTarget.FramebufferSrgb = New.FramebufferSrgb;
             }
 
             if (New.FlipX != Old.FlipX || New.FlipY != Old.FlipY || New.Instance != Old.Instance)
diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLRenderTarget.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLRenderTarget.cs
index 8d04f1aae7..ce5364e154 100644
--- a/Ryujinx.Graphics/Gal/OpenGL/OGLRenderTarget.cs
+++ b/Ryujinx.Graphics/Gal/OpenGL/OGLRenderTarget.cs
@@ -90,6 +90,8 @@ namespace Ryujinx.Graphics.Gal.OpenGL
 
         private int CopyPBO;
 
+        public bool FramebufferSrgb { get; set; }
+
         public OGLRenderTarget(OGLTexture Texture)
         {
             Attachments = new FrameBufferAttachments();
@@ -363,11 +365,24 @@ namespace Ryujinx.Graphics.Gal.OpenGL
 
             GL.Clear(ClearBufferMask.ColorBufferBit);
 
+            GL.Disable(EnableCap.FramebufferSrgb);
+
             GL.BlitFramebuffer(
-                SrcX0, SrcY0, SrcX1, SrcY1,
-                DstX0, DstY0, DstX1, DstY1,
+                SrcX0,
+                SrcY0,
+                SrcX1,
+                SrcY1,
+                DstX0,
+                DstY0,
+                DstX1,
+                DstY1,
                 ClearBufferMask.ColorBufferBit,
                 BlitFramebufferFilter.Linear);
+
+            if (FramebufferSrgb)
+            {
+                GL.Enable(EnableCap.FramebufferSrgb);
+            }
         }
 
         public void Copy(
@@ -432,7 +447,9 @@ namespace Ryujinx.Graphics.Gal.OpenGL
                 return;
             }
 
-            if (NewImage.Format == OldImage.Format)
+            if (NewImage.Format == OldImage.Format &&
+                NewImage.Width  == OldImage.Width  &&
+                NewImage.Height == OldImage.Height)
             {
                 return;
             }
@@ -444,7 +461,11 @@ namespace Ryujinx.Graphics.Gal.OpenGL
 
             GL.BindBuffer(BufferTarget.PixelPackBuffer, CopyPBO);
 
-            GL.BufferData(BufferTarget.PixelPackBuffer, Math.Max(ImageUtils.GetSize(OldImage), ImageUtils.GetSize(NewImage)), IntPtr.Zero, BufferUsageHint.StreamCopy);
+            //The buffer should be large enough to hold the largest texture.
+            int BufferSize = Math.Max(ImageUtils.GetSize(OldImage),
+                                      ImageUtils.GetSize(NewImage));
+
+            GL.BufferData(BufferTarget.PixelPackBuffer, BufferSize, IntPtr.Zero, BufferUsageHint.StreamCopy);
 
             if (!Texture.TryGetImageHandler(Key, out ImageHandler CachedImage))
             {
@@ -460,8 +481,12 @@ namespace Ryujinx.Graphics.Gal.OpenGL
             GL.BindBuffer(BufferTarget.PixelPackBuffer, 0);
             GL.BindBuffer(BufferTarget.PixelUnpackBuffer, CopyPBO);
 
+            GL.PixelStore(PixelStoreParameter.UnpackRowLength, OldImage.Width);
+
             Texture.Create(Key, ImageUtils.GetSize(NewImage), NewImage);
 
+            GL.PixelStore(PixelStoreParameter.UnpackRowLength, 0);
+
             GL.BindBuffer(BufferTarget.PixelUnpackBuffer, 0);
         }
 
diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLRenderer.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLRenderer.cs
index a23541f3dd..14fb901809 100644
--- a/Ryujinx.Graphics/Gal/OpenGL/OGLRenderer.cs
+++ b/Ryujinx.Graphics/Gal/OpenGL/OGLRenderer.cs
@@ -31,7 +31,11 @@ namespace Ryujinx.Graphics.Gal.OpenGL
 
             Shader = new OGLShader(Buffer as OGLConstBuffer);
 
-            Pipeline = new OGLPipeline(Buffer as OGLConstBuffer, Rasterizer as OGLRasterizer, Shader as OGLShader);
+            Pipeline = new OGLPipeline(
+                Buffer       as OGLConstBuffer,
+                RenderTarget as OGLRenderTarget,
+                Rasterizer   as OGLRasterizer,
+                Shader       as OGLShader);
 
             ActionsQueue = new ConcurrentQueue<Action>();
         }
diff --git a/Ryujinx.Graphics/Gal/Shader/GlslDecl.cs b/Ryujinx.Graphics/Gal/Shader/GlslDecl.cs
index c837632ec2..f1b63a8da3 100644
--- a/Ryujinx.Graphics/Gal/Shader/GlslDecl.cs
+++ b/Ryujinx.Graphics/Gal/Shader/GlslDecl.cs
@@ -1,3 +1,4 @@
+using Ryujinx.Graphics.Gal.OpenGL;
 using System;
 using System.Collections.Generic;
 
@@ -16,8 +17,6 @@ namespace Ryujinx.Graphics.Gal.Shader
         public const int VertexIdAttr    = 0x2fc;
         public const int FaceAttr        = 0x3fc;
 
-        public const int MaxUboSize = 1024;
-
         public const int GlPositionVec4Index = 7;
 
         public const int PositionOutAttrLocation = 15;
@@ -51,6 +50,8 @@ namespace Ryujinx.Graphics.Gal.Shader
         public const string SsyStackName = "ssy_stack";
         public const string SsyCursorName = "ssy_cursor";
 
+        public static int MaxUboSize => OGLLimit.MaxUboSize / 16;
+
         private string[] StagePrefixes = new string[] { "vp", "tcp", "tep", "gp", "fp" };
 
         private string StagePrefix;
@@ -98,8 +99,7 @@ namespace Ryujinx.Graphics.Gal.Shader
             m_Preds = new Dictionary<int, ShaderDeclInfo>();
         }
 
-        public GlslDecl(ShaderIrBlock[] Blocks, GalShaderType ShaderType, ShaderHeader Header)
-            : this(ShaderType)
+        public GlslDecl(ShaderIrBlock[] Blocks, GalShaderType ShaderType, ShaderHeader Header) : this(ShaderType)
         {
             StagePrefix = StagePrefixes[(int)ShaderType] + "_";
 
diff --git a/Ryujinx.Graphics/NvGpuEngine2d.cs b/Ryujinx.Graphics/NvGpuEngine2d.cs
index f20f8d6eeb..43fbcedf43 100644
--- a/Ryujinx.Graphics/NvGpuEngine2d.cs
+++ b/Ryujinx.Graphics/NvGpuEngine2d.cs
@@ -61,8 +61,11 @@ namespace Ryujinx.Graphics
             int DstBlitW = ReadRegister(NvGpuEngine2dReg.BlitDstW);
             int DstBlitH = ReadRegister(NvGpuEngine2dReg.BlitDstH);
 
-            int SrcBlitX = ReadRegister(NvGpuEngine2dReg.BlitSrcXInt);
-            int SrcBlitY = ReadRegister(NvGpuEngine2dReg.BlitSrcYInt);
+            long BlitDuDx = ReadRegisterFixed1_31_32(NvGpuEngine2dReg.BlitDuDxFract);
+            long BlitDvDy = ReadRegisterFixed1_31_32(NvGpuEngine2dReg.BlitDvDyFract);
+
+            long SrcBlitX = ReadRegisterFixed1_31_32(NvGpuEngine2dReg.BlitSrcXFract);
+            long SrcBlitY = ReadRegisterFixed1_31_32(NvGpuEngine2dReg.BlitSrcYFract);
 
             GalImageFormat SrcImgFormat = ImageUtils.ConvertSurface((GalSurfaceFormat)SrcFormat);
             GalImageFormat DstImgFormat = ImageUtils.ConvertSurface((GalSurfaceFormat)DstFormat);
@@ -99,13 +102,19 @@ namespace Ryujinx.Graphics
             Gpu.ResourceManager.SendTexture(Vmm, SrcKey, SrcTexture);
             Gpu.ResourceManager.SendTexture(Vmm, DstKey, DstTexture);
 
+            int SrcBlitX1 = (int)(SrcBlitX >> 32);
+            int SrcBlitY1 = (int)(SrcBlitY >> 32);
+
+            int SrcBlitX2 = (int)(SrcBlitX + DstBlitW * BlitDuDx >> 32);
+            int SrcBlitY2 = (int)(SrcBlitY + DstBlitH * BlitDvDy >> 32);
+
             Gpu.Renderer.RenderTarget.Copy(
                 SrcKey,
                 DstKey,
-                SrcBlitX,
-                SrcBlitY,
-                SrcBlitX + DstBlitW,
-                SrcBlitY + DstBlitH,
+                SrcBlitX1,
+                SrcBlitY1,
+                SrcBlitX2,
+                SrcBlitY2,
                 DstBlitX,
                 DstBlitY,
                 DstBlitX + DstBlitW,
@@ -121,8 +130,8 @@ namespace Ryujinx.Graphics
                 DstTexture,
                 SrcAddress,
                 DstAddress,
-                SrcBlitX,
-                SrcBlitY,
+                SrcBlitX1,
+                SrcBlitY1,
                 DstBlitX,
                 DstBlitY,
                 DstBlitW,
@@ -150,6 +159,14 @@ namespace Ryujinx.Graphics
             Registers[MethCall.Method] = MethCall.Argument;
         }
 
+        private long ReadRegisterFixed1_31_32(NvGpuEngine2dReg Reg)
+        {
+            long Low  = (uint)ReadRegister(Reg + 0);
+            long High = (uint)ReadRegister(Reg + 1);
+
+            return Low | (High << 32);
+        }
+
         private int ReadRegister(NvGpuEngine2dReg Reg)
         {
             return Registers[(int)Reg];