From d2bb458b51bbcbc097f8f53ac2a3b8b15a723a45 Mon Sep 17 00:00:00 2001
From: gdkchan <gab.dark.100@gmail.com>
Date: Sat, 17 Nov 2018 02:01:31 -0200
Subject: [PATCH] Improved GPU command lists decoding (#499)

* Better implementation of the DMA pusher, misc fixes

* Remove some debug code

* Correct RGBX8 format

* Add support for linked Texture Sampler Control

* Attempt to fix upside down screen issue
---
 ChocolArm64/Memory/MemoryManager.cs           |  26 ++-
 Ryujinx.Graphics/DmaPusher.cs                 | 190 ++++++++++++++++++
 Ryujinx.Graphics/Gal/GalImageFormat.cs        |   2 +
 .../Gal/OpenGL/OGLEnumConverter.cs            |   4 +-
 .../Gal/OpenGL/OGLRenderTarget.cs             |   2 -
 Ryujinx.Graphics/GpuMethodCall.cs             |  24 +++
 Ryujinx.Graphics/GpuResourceManager.cs        |   7 +-
 Ryujinx.Graphics/INvGpuEngine.cs              |   2 +-
 Ryujinx.Graphics/MacroInterpreter.cs          |  16 +-
 Ryujinx.Graphics/Memory/NvGpuPBEntry.cs       |  23 ---
 Ryujinx.Graphics/Memory/NvGpuPushBuffer.cs    | 101 ----------
 Ryujinx.Graphics/Memory/NvGpuVmmCache.cs      |  82 ++++++--
 Ryujinx.Graphics/NvGpu.cs                     |   6 +-
 Ryujinx.Graphics/NvGpuEngine2d.cs             |  79 +++++---
 Ryujinx.Graphics/NvGpuEngine3d.cs             | 103 ++++------
 Ryujinx.Graphics/NvGpuEngine3dReg.cs          |   2 +
 Ryujinx.Graphics/NvGpuEngineM2mf.cs           |  23 +--
 Ryujinx.Graphics/NvGpuEngineP2mf.cs           | 107 +++++++---
 Ryujinx.Graphics/NvGpuFifo.cs                 | 145 ++++++-------
 Ryujinx.Graphics/NvGpuMethod.cs               |   2 +-
 Ryujinx.Graphics/Texture/ImageUtils.cs        |  47 ++++-
 Ryujinx.Graphics/ValueRangeSet.cs             |   2 +-
 Ryujinx.HLE/HOS/Horizon.cs                    |   2 +-
 .../Nv/NvHostChannel/NvHostChannelIoctl.cs    |  10 +-
 Ryujinx.HLE/Switch.cs                         |   4 +-
 25 files changed, 616 insertions(+), 395 deletions(-)
 create mode 100644 Ryujinx.Graphics/DmaPusher.cs
 create mode 100644 Ryujinx.Graphics/GpuMethodCall.cs
 delete mode 100644 Ryujinx.Graphics/Memory/NvGpuPBEntry.cs
 delete mode 100644 Ryujinx.Graphics/Memory/NvGpuPushBuffer.cs

diff --git a/ChocolArm64/Memory/MemoryManager.cs b/ChocolArm64/Memory/MemoryManager.cs
index 308dd17ec7..ef3fb00646 100644
--- a/ChocolArm64/Memory/MemoryManager.cs
+++ b/ChocolArm64/Memory/MemoryManager.cs
@@ -409,9 +409,31 @@ namespace ChocolArm64.Memory
 
         public void WriteBytes(long position, byte[] data)
         {
-            EnsureRangeIsValid(position, (uint)data.Length);
+            long endAddr = position + data.Length;
 
-            Marshal.Copy(data, 0, (IntPtr)TranslateWrite(position), data.Length);
+            if ((ulong)endAddr < (ulong)position)
+            {
+                throw new ArgumentOutOfRangeException(nameof(position));
+            }
+
+            int offset = 0;
+
+            while ((ulong)position < (ulong)endAddr)
+            {
+                long pageLimit = (position + PageSize) & ~(long)PageMask;
+
+                if ((ulong)pageLimit > (ulong)endAddr)
+                {
+                    pageLimit = endAddr;
+                }
+
+                int copySize = (int)(pageLimit - position);
+
+                Marshal.Copy(data, offset, (IntPtr)TranslateWrite(position), copySize);
+
+                position += copySize;
+                offset   += copySize;
+            }
         }
 
         public void WriteBytes(long position, byte[] data, int startIndex, int size)
diff --git a/Ryujinx.Graphics/DmaPusher.cs b/Ryujinx.Graphics/DmaPusher.cs
new file mode 100644
index 0000000000..608d8a1d1a
--- /dev/null
+++ b/Ryujinx.Graphics/DmaPusher.cs
@@ -0,0 +1,190 @@
+using Ryujinx.Graphics.Memory;
+using System.Collections.Concurrent;
+using System.Threading;
+
+namespace Ryujinx.Graphics
+{
+    public class DmaPusher
+    {
+        private ConcurrentQueue<(NvGpuVmm, long)> IbBuffer;
+
+        private long DmaPut;
+        private long DmaGet;
+
+        private struct DmaState
+        {
+            public int  Method;
+            public int  SubChannel;
+            public int  MethodCount;
+            public bool NonIncrementing;
+            public bool IncrementOnce;
+            public int  LengthPending;
+        }
+
+        private DmaState State;
+
+        private bool SliEnable;
+        private bool SliActive;
+
+        private bool IbEnable;
+        private bool NonMain;
+
+        private long DmaMGet;
+
+        private NvGpuVmm Vmm;
+
+        private NvGpu Gpu;
+
+        private AutoResetEvent Event;
+
+        public DmaPusher(NvGpu Gpu)
+        {
+            this.Gpu = Gpu;
+
+            IbBuffer = new ConcurrentQueue<(NvGpuVmm, long)>();
+
+            IbEnable = true;
+
+            Event = new AutoResetEvent(false);
+        }
+
+        public void Push(NvGpuVmm Vmm, long Entry)
+        {
+            IbBuffer.Enqueue((Vmm, Entry));
+
+            Event.Set();
+        }
+
+        public bool WaitForCommands()
+        {
+            return Event.WaitOne(8);
+        }
+
+        public void DispatchCalls()
+        {
+            while (Step());
+        }
+
+        private bool Step()
+        {
+            if (DmaGet != DmaPut)
+            {
+                int Word = Vmm.ReadInt32(DmaGet);
+
+                DmaGet += 4;
+
+                if (!NonMain)
+                {
+                    DmaMGet = DmaGet;
+                }
+
+                if (State.LengthPending != 0)
+                {
+                    State.LengthPending = 0;
+                    State.MethodCount   = Word & 0xffffff;
+                }
+                else if (State.MethodCount != 0)
+                {
+                    if (!SliEnable || SliActive)
+                    {
+                        CallMethod(Word);
+                    }
+
+                    if (!State.NonIncrementing)
+                    {
+                        State.Method++;
+                    }
+
+                    if (State.IncrementOnce)
+                    {
+                        State.NonIncrementing = true;
+                    }
+
+                    State.MethodCount--;
+                }
+                else
+                {
+                    int SumissionMode = (Word >> 29) & 7;
+
+                    switch (SumissionMode)
+                    {
+                        case 1:
+                            //Incrementing.
+                            SetNonImmediateState(Word);
+
+                            State.NonIncrementing = false;
+                            State.IncrementOnce   = false;
+
+                            break;
+
+                        case 3:
+                            //Non-incrementing.
+                            SetNonImmediateState(Word);
+
+                            State.NonIncrementing = true;
+                            State.IncrementOnce   = false;
+
+                            break;
+
+                        case 4:
+                            //Immediate.
+                            State.Method          = (Word >> 0)  & 0x1fff;
+                            State.SubChannel      = (Word >> 13) & 7;
+                            State.NonIncrementing = true;
+                            State.IncrementOnce   = false;
+
+                            CallMethod((Word >> 16) & 0x1fff);
+
+                            break;
+
+                        case 5:
+                            //Increment-once.
+                            SetNonImmediateState(Word);
+
+                            State.NonIncrementing = false;
+                            State.IncrementOnce   = true;
+
+                            break;
+                    }
+                }
+            }
+            else if (IbEnable && IbBuffer.TryDequeue(out (NvGpuVmm Vmm, long Entry) Tuple))
+            {
+                this.Vmm = Tuple.Vmm;
+
+                long Entry = Tuple.Entry;
+
+                int Length = (int)(Entry >> 42) & 0x1fffff;
+
+                DmaGet = Entry & 0xfffffffffc;
+                DmaPut = DmaGet + Length * 4;
+
+                NonMain = (Entry & (1L << 41)) != 0;
+
+                Gpu.ResourceManager.ClearPbCache();
+            }
+            else
+            {
+                return false;
+            }
+
+            return true;
+        }
+
+        private void SetNonImmediateState(int Word)
+        {
+            State.Method      = (Word >> 0)  & 0x1fff;
+            State.SubChannel  = (Word >> 13) & 7;
+            State.MethodCount = (Word >> 16) & 0x1fff;
+        }
+
+        private void CallMethod(int Argument)
+        {
+            Gpu.Fifo.CallMethod(Vmm, new GpuMethodCall(
+                State.Method,
+                Argument,
+                State.SubChannel,
+                State.MethodCount));
+        }
+    }
+}
\ No newline at end of file
diff --git a/Ryujinx.Graphics/Gal/GalImageFormat.cs b/Ryujinx.Graphics/Gal/GalImageFormat.cs
index 2712cbc253..83c7f5697b 100644
--- a/Ryujinx.Graphics/Gal/GalImageFormat.cs
+++ b/Ryujinx.Graphics/Gal/GalImageFormat.cs
@@ -28,6 +28,7 @@ namespace Ryujinx.Graphics.Gal
         RGB5A1,
         R8,
         RG8,
+        RGBX8,
         RGBA8,
         BGRA8,
         RGB10A2,
@@ -39,6 +40,7 @@ namespace Ryujinx.Graphics.Gal
         RGBA32,
         R11G11B10,
         D16,
+        D24,
         D32,
         D24S8,
         D32S8,
diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs
index b499f18d0c..918163be49 100644
--- a/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs
+++ b/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs
@@ -139,6 +139,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL
                 case GalImageFormat.RG32      | GalImageFormat.Float: return (PixelInternalFormat.Rg32f,        PixelFormat.Rg,          PixelType.Float);
                 case GalImageFormat.RG32      | GalImageFormat.Sint:  return (PixelInternalFormat.Rg32i,        PixelFormat.RgInteger,   PixelType.Int);
                 case GalImageFormat.RG32      | GalImageFormat.Uint:  return (PixelInternalFormat.Rg32ui,       PixelFormat.RgInteger,   PixelType.UnsignedInt);
+                case GalImageFormat.RGBX8     | GalImageFormat.Unorm: return (PixelInternalFormat.Rgb8,         PixelFormat.Rgba,        PixelType.UnsignedByte);
                 case GalImageFormat.RGBA8     | GalImageFormat.Snorm: return (PixelInternalFormat.Rgba8Snorm,   PixelFormat.Rgba,        PixelType.Byte);
                 case GalImageFormat.RGBA8     | GalImageFormat.Unorm: return (PixelInternalFormat.Rgba8,        PixelFormat.Rgba,        PixelType.UnsignedByte);
                 case GalImageFormat.RGBA8     | GalImageFormat.Sint:  return (PixelInternalFormat.Rgba8i,       PixelFormat.RgbaInteger, PixelType.Byte);
@@ -174,10 +175,11 @@ namespace Ryujinx.Graphics.Gal.OpenGL
                 case GalImageFormat.R8        | GalImageFormat.Unorm: return (PixelInternalFormat.R8,           PixelFormat.Red,         PixelType.UnsignedByte);
                 case GalImageFormat.R11G11B10 | GalImageFormat.Float: return (PixelInternalFormat.R11fG11fB10f, PixelFormat.Rgb,         PixelType.UnsignedInt10F11F11FRev);
 
+                case GalImageFormat.D16   | GalImageFormat.Unorm: return (PixelInternalFormat.DepthComponent16,  PixelFormat.DepthComponent, PixelType.UnsignedShort);
+                case GalImageFormat.D24   | GalImageFormat.Unorm: return (PixelInternalFormat.DepthComponent24,  PixelFormat.DepthComponent, PixelType.UnsignedInt);
                 case GalImageFormat.D24S8 | GalImageFormat.Uint:  return (PixelInternalFormat.Depth24Stencil8,   PixelFormat.DepthStencil,   PixelType.UnsignedInt248);
                 case GalImageFormat.D24S8 | GalImageFormat.Unorm: return (PixelInternalFormat.Depth24Stencil8,   PixelFormat.DepthStencil,   PixelType.UnsignedInt248);
                 case GalImageFormat.D32   | GalImageFormat.Float: return (PixelInternalFormat.DepthComponent32f, PixelFormat.DepthComponent, PixelType.Float);
-                case GalImageFormat.D16   | GalImageFormat.Unorm: return (PixelInternalFormat.DepthComponent16,  PixelFormat.DepthComponent, PixelType.UnsignedShort);
                 case GalImageFormat.D32S8 | GalImageFormat.Float: return (PixelInternalFormat.Depth32fStencil8,  PixelFormat.DepthStencil,   PixelType.Float32UnsignedInt248Rev);
             }
 
diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLRenderTarget.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLRenderTarget.cs
index 17fddfface..8d04f1aae7 100644
--- a/Ryujinx.Graphics/Gal/OpenGL/OGLRenderTarget.cs
+++ b/Ryujinx.Graphics/Gal/OpenGL/OGLRenderTarget.cs
@@ -421,8 +421,6 @@ namespace Ryujinx.Graphics.Gal.OpenGL
 
                 ClearBufferMask Mask = GetClearMask(SrcTex);
 
-                GL.Clear(Mask);
-
                 GL.BlitFramebuffer(SrcX0, SrcY0, SrcX1, SrcY1, DstX0, DstY0, DstX1, DstY1, Mask, Filter);
             }
         }
diff --git a/Ryujinx.Graphics/GpuMethodCall.cs b/Ryujinx.Graphics/GpuMethodCall.cs
new file mode 100644
index 0000000000..762d10f1d0
--- /dev/null
+++ b/Ryujinx.Graphics/GpuMethodCall.cs
@@ -0,0 +1,24 @@
+namespace Ryujinx.Graphics
+{
+    struct GpuMethodCall
+    {
+        public int Method      { get; private set; }
+        public int Argument    { get; private set; }
+        public int SubChannel  { get; private set; }
+        public int MethodCount { get; private set; }
+
+        public bool IsLastCall => MethodCount <= 1;
+
+        public GpuMethodCall(
+            int Method,
+            int Argument,
+            int SubChannel  = 0,
+            int MethodCount = 0)
+        {
+            this.Method      = Method;
+            this.Argument    = Argument;
+            this.SubChannel  = SubChannel;
+            this.MethodCount = MethodCount;
+        }
+    }
+}
\ No newline at end of file
diff --git a/Ryujinx.Graphics/GpuResourceManager.cs b/Ryujinx.Graphics/GpuResourceManager.cs
index 71390a83ac..c3d697c5f9 100644
--- a/Ryujinx.Graphics/GpuResourceManager.cs
+++ b/Ryujinx.Graphics/GpuResourceManager.cs
@@ -117,7 +117,7 @@ namespace Ryujinx.Graphics
             return false;
         }
 
-        private bool MemoryRegionModified(NvGpuVmm Vmm, long Position, long Size, NvGpuBufferType Type)
+        public bool MemoryRegionModified(NvGpuVmm Vmm, long Position, long Size, NvGpuBufferType Type)
         {
             HashSet<long> Uploaded = UploadedKeys[(int)Type];
 
@@ -136,5 +136,10 @@ namespace Ryujinx.Graphics
                 UploadedKeys[Index].Clear();
             }
         }
+
+        public void ClearPbCache(NvGpuBufferType Type)
+        {
+            UploadedKeys[(int)Type].Clear();
+        }
     }
 }
diff --git a/Ryujinx.Graphics/INvGpuEngine.cs b/Ryujinx.Graphics/INvGpuEngine.cs
index 810303b9f2..3e79efd3d5 100644
--- a/Ryujinx.Graphics/INvGpuEngine.cs
+++ b/Ryujinx.Graphics/INvGpuEngine.cs
@@ -6,6 +6,6 @@ namespace Ryujinx.Graphics
     {
         int[] Registers { get; }
 
-        void CallMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry);
+        void CallMethod(NvGpuVmm Vmm, GpuMethodCall MethCall);
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.Graphics/MacroInterpreter.cs b/Ryujinx.Graphics/MacroInterpreter.cs
index 20e7895b4a..86831bae18 100644
--- a/Ryujinx.Graphics/MacroInterpreter.cs
+++ b/Ryujinx.Graphics/MacroInterpreter.cs
@@ -1,3 +1,4 @@
+using Ryujinx.Common.Logging;
 using Ryujinx.Graphics.Memory;
 using System;
 using System.Collections.Generic;
@@ -388,14 +389,11 @@ namespace Ryujinx.Graphics
         {
             int Value;
 
-            //If we don't have any parameters in the FIFO,
-            //keep running the PFIFO engine until it writes the parameters.
-            while (!Fifo.TryDequeue(out Value))
+            if (!Fifo.TryDequeue(out Value))
             {
-                if (!PFifo.Step())
-                {
-                    return 0;
-                }
+                Logger.PrintWarning(LogClass.Gpu, "Macro attempted to fetch an inexistent argument.");
+
+                return 0;
             }
 
             return Value;
@@ -408,9 +406,9 @@ namespace Ryujinx.Graphics
 
         private void Send(NvGpuVmm Vmm, int Value)
         {
-            NvGpuPBEntry PBEntry = new NvGpuPBEntry(MethAddr, 0, Value);
+            GpuMethodCall MethCall = new GpuMethodCall(MethAddr, Value);
 
-            Engine.CallMethod(Vmm, PBEntry);
+            Engine.CallMethod(Vmm, MethCall);
 
             MethAddr += MethIncr;
         }
diff --git a/Ryujinx.Graphics/Memory/NvGpuPBEntry.cs b/Ryujinx.Graphics/Memory/NvGpuPBEntry.cs
deleted file mode 100644
index 6b93c16995..0000000000
--- a/Ryujinx.Graphics/Memory/NvGpuPBEntry.cs
+++ /dev/null
@@ -1,23 +0,0 @@
-using System;
-using System.Collections.ObjectModel;
-
-namespace Ryujinx.Graphics.Memory
-{
-    public struct NvGpuPBEntry
-    {
-        public int Method { get; private set; }
-
-        public int SubChannel { get; private set; }
-
-        private int[] m_Arguments;
-
-        public ReadOnlyCollection<int> Arguments => Array.AsReadOnly(m_Arguments);
-
-        public NvGpuPBEntry(int Method, int SubChannel, params int[] Arguments)
-        {
-            this.Method      = Method;
-            this.SubChannel  = SubChannel;
-            this.m_Arguments = Arguments;
-        }
-    }
-}
\ No newline at end of file
diff --git a/Ryujinx.Graphics/Memory/NvGpuPushBuffer.cs b/Ryujinx.Graphics/Memory/NvGpuPushBuffer.cs
deleted file mode 100644
index 0902ebfc92..0000000000
--- a/Ryujinx.Graphics/Memory/NvGpuPushBuffer.cs
+++ /dev/null
@@ -1,101 +0,0 @@
-using System.Collections.Generic;
-using System.IO;
-
-namespace Ryujinx.Graphics.Memory
-{
-    public static class NvGpuPushBuffer
-    {
-        private enum SubmissionMode
-        {
-            Incrementing    = 1,
-            NonIncrementing = 3,
-            Immediate       = 4,
-            IncrementOnce   = 5
-        }
-
-        public static NvGpuPBEntry[] Decode(byte[] Data)
-        {
-            using (MemoryStream MS = new MemoryStream(Data))
-            {
-                BinaryReader Reader = new BinaryReader(MS);
-
-                List<NvGpuPBEntry> PushBuffer = new List<NvGpuPBEntry>();
-
-                bool CanRead() => MS.Position + 4 <= MS.Length;
-
-                while (CanRead())
-                {
-                    int Packed = Reader.ReadInt32();
-
-                    int Meth = (Packed >> 0)  & 0x1fff;
-                    int SubC = (Packed >> 13) & 7;
-                    int Args = (Packed >> 16) & 0x1fff;
-                    int Mode = (Packed >> 29) & 7;
-
-                    switch ((SubmissionMode)Mode)
-                    {
-                        case SubmissionMode.Incrementing:
-                        {
-                            for (int Index = 0; Index < Args && CanRead(); Index++, Meth++)
-                            {
-                                PushBuffer.Add(new NvGpuPBEntry(Meth, SubC, Reader.ReadInt32()));
-                            }
-
-                            break;
-                        }
-
-                        case SubmissionMode.NonIncrementing:
-                        {
-                            int[] Arguments = new int[Args];
-
-                            for (int Index = 0; Index < Arguments.Length; Index++)
-                            {
-                                if (!CanRead())
-                                {
-                                    break;
-                                }
-
-                                Arguments[Index] = Reader.ReadInt32();
-                            }
-
-                            PushBuffer.Add(new NvGpuPBEntry(Meth, SubC, Arguments));
-
-                            break;
-                        }
-
-                        case SubmissionMode.Immediate:
-                        {
-                            PushBuffer.Add(new NvGpuPBEntry(Meth, SubC, Args));
-
-                            break;
-                        }
-
-                        case SubmissionMode.IncrementOnce:
-                        {
-                            if (CanRead())
-                            {
-                                PushBuffer.Add(new NvGpuPBEntry(Meth, SubC, Reader.ReadInt32()));
-                            }
-
-                            if (CanRead() && Args > 1)
-                            {
-                                int[] Arguments = new int[Args - 1];
-
-                                for (int Index = 0; Index < Arguments.Length && CanRead(); Index++)
-                                {
-                                    Arguments[Index] = Reader.ReadInt32();
-                                }
-
-                                PushBuffer.Add(new NvGpuPBEntry(Meth + 1, SubC, Arguments));
-                            }
-
-                            break;
-                        }
-                    }
-                }
-
-                return PushBuffer.ToArray();
-            }
-        }
-    }
-}
\ No newline at end of file
diff --git a/Ryujinx.Graphics/Memory/NvGpuVmmCache.cs b/Ryujinx.Graphics/Memory/NvGpuVmmCache.cs
index 57e25a2fce..dd6d37c9f1 100644
--- a/Ryujinx.Graphics/Memory/NvGpuVmmCache.cs
+++ b/Ryujinx.Graphics/Memory/NvGpuVmmCache.cs
@@ -5,27 +5,54 @@ namespace Ryujinx.Graphics.Memory
 {
     class NvGpuVmmCache
     {
-        private ValueRangeSet<int> CachedRanges;
+        private struct CachedResource
+        {
+            public long Key;
+            public int  Mask;
+
+            public CachedResource(long Key, int Mask)
+            {
+                this.Key  = Key;
+                this.Mask = Mask;
+            }
+
+            public override int GetHashCode()
+            {
+                return (int)(Key * 23 + Mask);
+            }
+
+            public override bool Equals(object obj)
+            {
+                return obj is CachedResource Cached && Equals(Cached);
+            }
+
+            public bool Equals(CachedResource other)
+            {
+                return Key == other.Key && Mask == other.Mask;
+            }
+        }
+
+        private ValueRangeSet<CachedResource> CachedRanges;
 
         public NvGpuVmmCache()
         {
-            CachedRanges = new ValueRangeSet<int>();
+            CachedRanges = new ValueRangeSet<CachedResource>();
         }
 
-        public bool IsRegionModified(MemoryManager Memory, NvGpuBufferType BufferType, long PA, long Size)
+        public bool IsRegionModified(MemoryManager Memory, NvGpuBufferType BufferType, long Start, long Size)
         {
-            (bool[] Modified, long ModifiedCount) = Memory.IsRegionModified(PA, Size);
+            (bool[] Modified, long ModifiedCount) = Memory.IsRegionModified(Start, Size);
 
             //Remove all modified ranges.
             int Index = 0;
 
-            long Position = PA & ~NvGpuVmm.PageMask;
+            long Position = Start & ~NvGpuVmm.PageMask;
 
             while (ModifiedCount > 0)
             {
                 if (Modified[Index++])
                 {
-                    CachedRanges.Remove(new ValueRange<int>(Position, Position + NvGpuVmm.PageSize));
+                    CachedRanges.Remove(new ValueRange<CachedResource>(Position, Position + NvGpuVmm.PageSize));
 
                     ModifiedCount--;
                 }
@@ -37,11 +64,19 @@ namespace Ryujinx.Graphics.Memory
             //If the region is not yet present on the list, then a new ValueRange
             //is directly added with the current resource type as the only bit set.
             //Otherwise, it just sets the bit for this new resource type on the current mask.
+            //The physical address of the resource is used as key, those keys are used to keep
+            //track of resources that are already on the cache. A resource may be inside another
+            //resource, and in this case we should return true if the "sub-resource" was not
+            //yet cached.
             int Mask = 1 << (int)BufferType;
 
-            ValueRange<int> NewCached = new ValueRange<int>(PA, PA + Size);
+            CachedResource NewCachedValue = new CachedResource(Start, Mask);
 
-            ValueRange<int>[] Ranges = CachedRanges.GetAllIntersections(NewCached);
+            ValueRange<CachedResource> NewCached = new ValueRange<CachedResource>(Start, Start + Size);
+
+            ValueRange<CachedResource>[] Ranges = CachedRanges.GetAllIntersections(NewCached);
+
+            bool IsKeyCached = Ranges.Length > 0 && Ranges[0].Value.Key == Start;
 
             long LastEnd = NewCached.Start;
 
@@ -49,23 +84,36 @@ namespace Ryujinx.Graphics.Memory
 
             for (Index = 0; Index < Ranges.Length; Index++)
             {
-                ValueRange<int> Current = Ranges[Index];
+                ValueRange<CachedResource> Current = Ranges[Index];
+
+                CachedResource Cached = Current.Value;
 
                 long RgStart = Math.Max(Current.Start, NewCached.Start);
                 long RgEnd   = Math.Min(Current.End,   NewCached.End);
 
-                if ((Current.Value & Mask) == 0)
-                {
-                    CachedRanges.Add(new ValueRange<int>(RgStart, RgEnd, Current.Value | Mask));
-                }
-                else
+                if ((Cached.Mask & Mask) != 0)
                 {
                     Coverage += RgEnd - RgStart;
                 }
 
+                //Highest key value has priority, this prevents larger resources
+                //for completely invalidating smaller ones on the cache. For example,
+                //consider that a resource in the range [100, 200) was added, and then
+                //another one in the range [50, 200). We prevent the new resource from
+                //completely replacing the old one by spliting it like this:
+                //New resource key is added at [50, 100), old key is still present at [100, 200).
+                if (Cached.Key < Start)
+                {
+                    Cached.Key = Start;
+                }
+
+                Cached.Mask |= Mask;
+
+                CachedRanges.Add(new ValueRange<CachedResource>(RgStart, RgEnd, Cached));
+
                 if (RgStart > LastEnd)
                 {
-                    CachedRanges.Add(new ValueRange<int>(LastEnd, RgStart, Mask));
+                    CachedRanges.Add(new ValueRange<CachedResource>(LastEnd, RgStart, NewCachedValue));
                 }
 
                 LastEnd = RgEnd;
@@ -73,10 +121,10 @@ namespace Ryujinx.Graphics.Memory
 
             if (LastEnd < NewCached.End)
             {
-                CachedRanges.Add(new ValueRange<int>(LastEnd, NewCached.End, Mask));
+                CachedRanges.Add(new ValueRange<CachedResource>(LastEnd, NewCached.End, NewCachedValue));
             }
 
-            return Coverage != Size;
+            return !IsKeyCached || Coverage != Size;
         }
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.Graphics/NvGpu.cs b/Ryujinx.Graphics/NvGpu.cs
index 4c6abd234c..6989be98a2 100644
--- a/Ryujinx.Graphics/NvGpu.cs
+++ b/Ryujinx.Graphics/NvGpu.cs
@@ -8,8 +8,9 @@ namespace Ryujinx.Graphics
 
         public GpuResourceManager ResourceManager { get; private set; }
 
-        public NvGpuFifo Fifo { get; private set; }
+        public DmaPusher Pusher { get; private set; }
 
+        internal NvGpuFifo       Fifo       { get; private set; }
         internal NvGpuEngine2d   Engine2d   { get; private set; }
         internal NvGpuEngine3d   Engine3d   { get; private set; }
         internal NvGpuEngineM2mf EngineM2mf { get; private set; }
@@ -21,8 +22,9 @@ namespace Ryujinx.Graphics
 
             ResourceManager = new GpuResourceManager(this);
 
-            Fifo = new NvGpuFifo(this);
+            Pusher = new DmaPusher(this);
 
+            Fifo       = new NvGpuFifo(this);
             Engine2d   = new NvGpuEngine2d(this);
             Engine3d   = new NvGpuEngine3d(this);
             EngineM2mf = new NvGpuEngineM2mf(this);
diff --git a/Ryujinx.Graphics/NvGpuEngine2d.cs b/Ryujinx.Graphics/NvGpuEngine2d.cs
index 711df1224c..f20f8d6eeb 100644
--- a/Ryujinx.Graphics/NvGpuEngine2d.cs
+++ b/Ryujinx.Graphics/NvGpuEngine2d.cs
@@ -1,11 +1,10 @@
 using Ryujinx.Graphics.Gal;
 using Ryujinx.Graphics.Memory;
 using Ryujinx.Graphics.Texture;
-using System;
 
 namespace Ryujinx.Graphics
 {
-    public class NvGpuEngine2d : INvGpuEngine
+    class NvGpuEngine2d : INvGpuEngine
     {
         private enum CopyOperation
         {
@@ -29,11 +28,11 @@ namespace Ryujinx.Graphics
             Registers = new int[0x238];
         }
 
-        public void CallMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
+        public void CallMethod(NvGpuVmm Vmm, GpuMethodCall MethCall)
         {
-            WriteRegister(PBEntry);
+            WriteRegister(MethCall);
 
-            if ((NvGpuEngine2dReg)PBEntry.Method == NvGpuEngine2dReg.BlitSrcYInt)
+            if ((NvGpuEngine2dReg)MethCall.Method == NvGpuEngine2dReg.BlitSrcYInt)
             {
                 TextureCopy(Vmm);
             }
@@ -43,6 +42,13 @@ namespace Ryujinx.Graphics
         {
             CopyOperation Operation = (CopyOperation)ReadRegister(NvGpuEngine2dReg.CopyOperation);
 
+            int  DstFormat = ReadRegister(NvGpuEngine2dReg.DstFormat);
+            bool DstLinear = ReadRegister(NvGpuEngine2dReg.DstLinear) != 0;
+            int  DstWidth  = ReadRegister(NvGpuEngine2dReg.DstWidth);
+            int  DstHeight = ReadRegister(NvGpuEngine2dReg.DstHeight);
+            int  DstPitch  = ReadRegister(NvGpuEngine2dReg.DstPitch);
+            int  DstBlkDim = ReadRegister(NvGpuEngine2dReg.DstBlockDimensions);
+
             int  SrcFormat = ReadRegister(NvGpuEngine2dReg.SrcFormat);
             bool SrcLinear = ReadRegister(NvGpuEngine2dReg.SrcLinear) != 0;
             int  SrcWidth  = ReadRegister(NvGpuEngine2dReg.SrcWidth);
@@ -50,12 +56,13 @@ namespace Ryujinx.Graphics
             int  SrcPitch  = ReadRegister(NvGpuEngine2dReg.SrcPitch);
             int  SrcBlkDim = ReadRegister(NvGpuEngine2dReg.SrcBlockDimensions);
 
-            int  DstFormat = ReadRegister(NvGpuEngine2dReg.DstFormat);
-            bool DstLinear = ReadRegister(NvGpuEngine2dReg.DstLinear) != 0;
-            int  DstWidth  = ReadRegister(NvGpuEngine2dReg.DstWidth);
-            int  DstHeight = ReadRegister(NvGpuEngine2dReg.DstHeight);
-            int  DstPitch  = ReadRegister(NvGpuEngine2dReg.DstPitch);
-            int  DstBlkDim = ReadRegister(NvGpuEngine2dReg.DstBlockDimensions);
+            int DstBlitX = ReadRegister(NvGpuEngine2dReg.BlitDstX);
+            int DstBlitY = ReadRegister(NvGpuEngine2dReg.BlitDstY);
+            int DstBlitW = ReadRegister(NvGpuEngine2dReg.BlitDstW);
+            int DstBlitH = ReadRegister(NvGpuEngine2dReg.BlitDstH);
+
+            int SrcBlitX = ReadRegister(NvGpuEngine2dReg.BlitSrcXInt);
+            int SrcBlitY = ReadRegister(NvGpuEngine2dReg.BlitSrcYInt);
 
             GalImageFormat SrcImgFormat = ImageUtils.ConvertSurface((GalSurfaceFormat)SrcFormat);
             GalImageFormat DstImgFormat = ImageUtils.ConvertSurface((GalSurfaceFormat)DstFormat);
@@ -86,23 +93,42 @@ namespace Ryujinx.Graphics
                 DstLayout,
                 DstImgFormat);
 
+            SrcTexture.Pitch = SrcPitch;
+            DstTexture.Pitch = DstPitch;
+
             Gpu.ResourceManager.SendTexture(Vmm, SrcKey, SrcTexture);
             Gpu.ResourceManager.SendTexture(Vmm, DstKey, DstTexture);
 
-            int Width  = Math.Min(SrcWidth,  DstWidth);
-            int Height = Math.Min(SrcHeight, DstHeight);
-
             Gpu.Renderer.RenderTarget.Copy(
                 SrcKey,
                 DstKey,
-                0,
-                0,
-                Width,
-                Height,
-                0,
-                0,
-                Width,
-                Height);
+                SrcBlitX,
+                SrcBlitY,
+                SrcBlitX + DstBlitW,
+                SrcBlitY + DstBlitH,
+                DstBlitX,
+                DstBlitY,
+                DstBlitX + DstBlitW,
+                DstBlitY + DstBlitH);
+
+            //Do a guest side copy aswell. This is necessary when
+            //the texture is modified by the guest, however it doesn't
+            //work when resources that the gpu can write to are copied,
+            //like framebuffers.
+            ImageUtils.CopyTexture(
+                Vmm,
+                SrcTexture,
+                DstTexture,
+                SrcAddress,
+                DstAddress,
+                SrcBlitX,
+                SrcBlitY,
+                DstBlitX,
+                DstBlitY,
+                DstBlitW,
+                DstBlitH);
+
+            Vmm.IsRegionModified(DstKey, ImageUtils.GetSize(DstTexture), NvGpuBufferType.Texture);
         }
 
         private static GalMemoryLayout GetLayout(bool Linear)
@@ -119,14 +145,9 @@ namespace Ryujinx.Graphics
                 (uint)Registers[(int)Reg + 1];
         }
 
-        private void WriteRegister(NvGpuPBEntry PBEntry)
+        private void WriteRegister(GpuMethodCall MethCall)
         {
-            int ArgsCount = PBEntry.Arguments.Count;
-
-            if (ArgsCount > 0)
-            {
-                Registers[PBEntry.Method] = PBEntry.Arguments[ArgsCount - 1];
-            }
+            Registers[MethCall.Method] = MethCall.Argument;
         }
 
         private int ReadRegister(NvGpuEngine2dReg Reg)
diff --git a/Ryujinx.Graphics/NvGpuEngine3d.cs b/Ryujinx.Graphics/NvGpuEngine3d.cs
index 0d748ce741..9fa428386c 100644
--- a/Ryujinx.Graphics/NvGpuEngine3d.cs
+++ b/Ryujinx.Graphics/NvGpuEngine3d.cs
@@ -7,7 +7,7 @@ using System.Collections.Generic;
 
 namespace Ryujinx.Graphics
 {
-    public class NvGpuEngine3d : INvGpuEngine
+    class NvGpuEngine3d : INvGpuEngine
     {
         public int[] Registers { get; private set; }
 
@@ -24,8 +24,6 @@ namespace Ryujinx.Graphics
 
         private ConstBuffer[][] ConstBuffers;
 
-        private List<long>[] UploadedKeys;
-
         private int CurrentInstance = 0;
 
         public NvGpuEngine3d(NvGpu Gpu)
@@ -59,13 +57,6 @@ namespace Ryujinx.Graphics
                 ConstBuffers[Index] = new ConstBuffer[18];
             }
 
-            UploadedKeys = new List<long>[(int)NvGpuBufferType.Count];
-
-            for (int i = 0; i < UploadedKeys.Length; i++)
-            {
-                UploadedKeys[i] = new List<long>();
-            }
-
             //Ensure that all components are enabled by default.
             //FIXME: Is this correct?
             WriteRegister(NvGpuEngine3dReg.ColorMaskN, 0x1111);
@@ -81,27 +72,19 @@ namespace Ryujinx.Graphics
             }
         }
 
-        public void CallMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
+        public void CallMethod(NvGpuVmm Vmm, GpuMethodCall MethCall)
         {
-            if (Methods.TryGetValue(PBEntry.Method, out NvGpuMethod Method))
+            if (Methods.TryGetValue(MethCall.Method, out NvGpuMethod Method))
             {
-                Method(Vmm, PBEntry);
+                Method(Vmm, MethCall);
             }
             else
             {
-                WriteRegister(PBEntry);
+                WriteRegister(MethCall);
             }
         }
 
-        public void ResetCache()
-        {
-            foreach (List<long> Uploaded in UploadedKeys)
-            {
-                Uploaded.Clear();
-            }
-        }
-
-        private void VertexEndGl(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
+        private void VertexEndGl(NvGpuVmm Vmm, GpuMethodCall MethCall)
         {
             LockCaches();
 
@@ -152,13 +135,11 @@ namespace Ryujinx.Graphics
             Gpu.Renderer.Texture.UnlockCache();
         }
 
-        private void ClearBuffers(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
+        private void ClearBuffers(NvGpuVmm Vmm, GpuMethodCall MethCall)
         {
-            int Arg0 = PBEntry.Arguments[0];
+            int Attachment = (MethCall.Argument >> 6) & 0xf;
 
-            int Attachment = (Arg0 >> 6) & 0xf;
-
-            GalClearBufferFlags Flags = (GalClearBufferFlags)(Arg0 & 0x3f);
+            GalClearBufferFlags Flags = (GalClearBufferFlags)(MethCall.Argument & 0x3f);
 
             float Red   = ReadRegisterFloat(NvGpuEngine3dReg.ClearNColor + 0);
             float Green = ReadRegisterFloat(NvGpuEngine3dReg.ClearNColor + 1);
@@ -234,6 +215,15 @@ namespace Ryujinx.Graphics
 
             State.FlipX = GetFlipSign(NvGpuEngine3dReg.ViewportNScaleX);
             State.FlipY = GetFlipSign(NvGpuEngine3dReg.ViewportNScaleY);
+
+            int ScreenYControl = ReadRegister(NvGpuEngine3dReg.ScreenYControl);
+
+            bool NegateY = (ScreenYControl & 1) != 0;
+
+            if (NegateY)
+            {
+                State.FlipY = -State.FlipY;
+            }
         }
 
         private void SetZeta(NvGpuVmm Vmm)
@@ -566,8 +556,11 @@ namespace Ryujinx.Graphics
                 return;
             }
 
+            bool LinkedTsc = ReadRegisterBool(NvGpuEngine3dReg.LinkedTsc);
+
             int TicIndex = (TextureHandle >>  0) & 0xfffff;
-            int TscIndex = (TextureHandle >> 20) & 0xfff;
+
+            int TscIndex = LinkedTsc ? TicIndex : (TextureHandle >> 20) & 0xfff;
 
             long TicPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.TexHeaderPoolOffset);
             long TscPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.TexSamplerPoolOffset);
@@ -618,7 +611,7 @@ namespace Ryujinx.Graphics
 
                     long Key = Vmm.GetPhysicalAddress(Cb.Position);
 
-                    if (QueryKeyUpload(Vmm, Key, Cb.Size, NvGpuBufferType.ConstBuffer))
+                    if (Gpu.ResourceManager.MemoryRegionModified(Vmm, Key, Cb.Size, NvGpuBufferType.ConstBuffer))
                     {
                         IntPtr Source = Vmm.GetHostAddress(Cb.Position, Cb.Size);
 
@@ -661,7 +654,7 @@ namespace Ryujinx.Graphics
                     PrimType == GalPrimitiveType.Quads ||
                     PrimType == GalPrimitiveType.QuadStrip;
 
-                if (!IboCached || QueryKeyUpload(Vmm, IboKey, (uint)IbSize, NvGpuBufferType.Index))
+                if (!IboCached || Gpu.ResourceManager.MemoryRegionModified(Vmm, IboKey, (uint)IbSize, NvGpuBufferType.Index))
                 {
                     if (!UsesLegacyQuads)
                     {
@@ -778,7 +771,7 @@ namespace Ryujinx.Graphics
 
                 bool VboCached = Gpu.Renderer.Rasterizer.IsVboCached(VboKey, VbSize);
 
-                if (!VboCached || QueryKeyUpload(Vmm, VboKey, VbSize, NvGpuBufferType.Vertex))
+                if (!VboCached || Gpu.ResourceManager.MemoryRegionModified(Vmm, VboKey, VbSize, NvGpuBufferType.Vertex))
                 {
                     IntPtr DataAddress = Vmm.GetHostAddress(VertexPosition, VbSize);
 
@@ -877,9 +870,9 @@ namespace Ryujinx.Graphics
             WriteCounterAndTimestamp
         }
 
-        private void QueryControl(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
+        private void QueryControl(NvGpuVmm Vmm, GpuMethodCall MethCall)
         {
-            WriteRegister(PBEntry);
+            WriteRegister(MethCall);
 
             long Position = MakeInt64From2xInt32(NvGpuEngine3dReg.QueryAddress);
 
@@ -909,29 +902,24 @@ namespace Ryujinx.Graphics
             }
         }
 
-        private void CbData(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
+        private void CbData(NvGpuVmm Vmm, GpuMethodCall MethCall)
         {
             long Position = MakeInt64From2xInt32(NvGpuEngine3dReg.ConstBufferAddress);
 
             int Offset = ReadRegister(NvGpuEngine3dReg.ConstBufferOffset);
 
-            foreach (int Arg in PBEntry.Arguments)
-            {
-                Vmm.WriteInt32(Position + Offset, Arg);
+            Vmm.WriteInt32(Position + Offset, MethCall.Argument);
 
-                Offset += 4;
-            }
+            WriteRegister(NvGpuEngine3dReg.ConstBufferOffset, Offset + 4);
 
-            WriteRegister(NvGpuEngine3dReg.ConstBufferOffset, Offset);
-
-            UploadedKeys[(int)NvGpuBufferType.ConstBuffer].Clear();
+            Gpu.ResourceManager.ClearPbCache(NvGpuBufferType.ConstBuffer);
         }
 
-        private void CbBind(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
+        private void CbBind(NvGpuVmm Vmm, GpuMethodCall MethCall)
         {
-            int Stage = (PBEntry.Method - 0x904) >> 3;
+            int Stage = (MethCall.Method - 0x904) >> 3;
 
-            int Index = PBEntry.Arguments[0];
+            int Index = MethCall.Argument;
 
             bool Enabled = (Index & 1) != 0;
 
@@ -970,14 +958,9 @@ namespace Ryujinx.Graphics
                 (uint)Registers[(int)Reg + 1];
         }
 
-        private void WriteRegister(NvGpuPBEntry PBEntry)
+        private void WriteRegister(GpuMethodCall MethCall)
         {
-            int ArgsCount = PBEntry.Arguments.Count;
-
-            if (ArgsCount > 0)
-            {
-                Registers[PBEntry.Method] = PBEntry.Arguments[ArgsCount - 1];
-            }
+            Registers[MethCall.Method] = MethCall.Argument;
         }
 
         private int ReadRegister(NvGpuEngine3dReg Reg)
@@ -999,19 +982,5 @@ namespace Ryujinx.Graphics
         {
             Registers[(int)Reg] = Value;
         }
-
-        private bool QueryKeyUpload(NvGpuVmm Vmm, long Key, long Size, NvGpuBufferType Type)
-        {
-            List<long> Uploaded = UploadedKeys[(int)Type];
-
-            if (Uploaded.Contains(Key))
-            {
-                return false;
-            }
-
-            Uploaded.Add(Key);
-
-            return Vmm.IsRegionModified(Key, Size, Type);
-        }
     }
 }
diff --git a/Ryujinx.Graphics/NvGpuEngine3dReg.cs b/Ryujinx.Graphics/NvGpuEngine3dReg.cs
index bd61602bfd..c229e6c290 100644
--- a/Ryujinx.Graphics/NvGpuEngine3dReg.cs
+++ b/Ryujinx.Graphics/NvGpuEngine3dReg.cs
@@ -36,6 +36,7 @@ namespace Ryujinx.Graphics
         ZetaHoriz            = 0x48a,
         ZetaVert             = 0x48b,
         ZetaArrayMode        = 0x48c,
+        LinkedTsc            = 0x48d,
         DepthTestEnable      = 0x4b3,
         BlendIndependent     = 0x4b9,
         DepthWriteEnable     = 0x4ba,
@@ -57,6 +58,7 @@ namespace Ryujinx.Graphics
         StencilFrontFuncRef  = 0x4e5,
         StencilFrontFuncMask = 0x4e6,
         StencilFrontMask     = 0x4e7,
+        ScreenYControl       = 0x4eb,
         VertexArrayElemBase  = 0x50d,
         VertexArrayInstBase  = 0x50e,
         ZetaEnable           = 0x54e,
diff --git a/Ryujinx.Graphics/NvGpuEngineM2mf.cs b/Ryujinx.Graphics/NvGpuEngineM2mf.cs
index d612128066..5ee18ea9e8 100644
--- a/Ryujinx.Graphics/NvGpuEngineM2mf.cs
+++ b/Ryujinx.Graphics/NvGpuEngineM2mf.cs
@@ -4,7 +4,7 @@ using System.Collections.Generic;
 
 namespace Ryujinx.Graphics
 {
-    public class NvGpuEngineM2mf : INvGpuEngine
+    class NvGpuEngineM2mf : INvGpuEngine
     {
         public int[] Registers { get; private set; }
 
@@ -33,22 +33,22 @@ namespace Ryujinx.Graphics
             AddMethod(0xc0, 1, 1, Execute);
         }
 
-        public void CallMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
+        public void CallMethod(NvGpuVmm Vmm, GpuMethodCall MethCall)
         {
-            if (Methods.TryGetValue(PBEntry.Method, out NvGpuMethod Method))
+            if (Methods.TryGetValue(MethCall.Method, out NvGpuMethod Method))
             {
-                Method(Vmm, PBEntry);
+                Method(Vmm, MethCall);
             }
             else
             {
-                WriteRegister(PBEntry);
+                WriteRegister(MethCall);
             }
         }
 
-        private void Execute(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
+        private void Execute(NvGpuVmm Vmm, GpuMethodCall MethCall)
         {
             //TODO: Some registers and copy modes are still not implemented.
-            int Control = PBEntry.Arguments[0];
+            int Control = MethCall.Argument;
 
             bool SrcLinear = ((Control >> 7) & 1) != 0;
             bool DstLinear = ((Control >> 8) & 1) != 0;
@@ -169,14 +169,9 @@ namespace Ryujinx.Graphics
                 (uint)Registers[(int)Reg + 1];
         }
 
-        private void WriteRegister(NvGpuPBEntry PBEntry)
+        private void WriteRegister(GpuMethodCall MethCall)
         {
-            int ArgsCount = PBEntry.Arguments.Count;
-
-            if (ArgsCount > 0)
-            {
-                Registers[PBEntry.Method] = PBEntry.Arguments[ArgsCount - 1];
-            }
+            Registers[MethCall.Method] = MethCall.Argument;
         }
 
         private int ReadRegister(NvGpuEngineM2mfReg Reg)
diff --git a/Ryujinx.Graphics/NvGpuEngineP2mf.cs b/Ryujinx.Graphics/NvGpuEngineP2mf.cs
index 842dfc5221..b111c59e32 100644
--- a/Ryujinx.Graphics/NvGpuEngineP2mf.cs
+++ b/Ryujinx.Graphics/NvGpuEngineP2mf.cs
@@ -1,10 +1,10 @@
 using Ryujinx.Graphics.Memory;
+using Ryujinx.Graphics.Texture;
 using System.Collections.Generic;
-using System.Collections.ObjectModel;
 
 namespace Ryujinx.Graphics
 {
-    public class NvGpuEngineP2mf : INvGpuEngine
+    class NvGpuEngineP2mf : INvGpuEngine
     {
         public int[] Registers { get; private set; }
 
@@ -12,7 +12,21 @@ namespace Ryujinx.Graphics
 
         private Dictionary<int, NvGpuMethod> Methods;
 
-        private ReadOnlyCollection<int> DataBuffer;
+        private int CopyStartX;
+        private int CopyStartY;
+
+        private int CopyWidth;
+        private int CopyHeight;
+        private int CopyGobBlockHeight;
+
+        private long CopyAddress;
+
+        private int CopyOffset;
+        private int CopySize;
+
+        private bool CopyLinear;
+
+        private byte[] Buffer;
 
         public NvGpuEngineP2mf(NvGpu Gpu)
         {
@@ -36,40 +50,90 @@ namespace Ryujinx.Graphics
             AddMethod(0x6d, 1, 1, PushData);
         }
 
-        public void CallMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
+        public void CallMethod(NvGpuVmm Vmm, GpuMethodCall MethCall)
         {
-            if (Methods.TryGetValue(PBEntry.Method, out NvGpuMethod Method))
+            if (Methods.TryGetValue(MethCall.Method, out NvGpuMethod Method))
             {
-                Method(Vmm, PBEntry);
+                Method(Vmm, MethCall);
             }
             else
             {
-                WriteRegister(PBEntry);
+                WriteRegister(MethCall);
             }
         }
 
-        private void Execute(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
+        private void Execute(NvGpuVmm Vmm, GpuMethodCall MethCall)
         {
             //TODO: Some registers and copy modes are still not implemented.
-            int Control = PBEntry.Arguments[0];
+            int Control = MethCall.Argument;
 
             long DstAddress = MakeInt64From2xInt32(NvGpuEngineP2mfReg.DstAddress);
 
+            int DstPitch  = ReadRegister(NvGpuEngineP2mfReg.DstPitch);
+            int DstBlkDim = ReadRegister(NvGpuEngineP2mfReg.DstBlockDim);
+
+            int DstX = ReadRegister(NvGpuEngineP2mfReg.DstX);
+            int DstY = ReadRegister(NvGpuEngineP2mfReg.DstY);
+
+            int DstWidth  = ReadRegister(NvGpuEngineP2mfReg.DstWidth);
+            int DstHeight = ReadRegister(NvGpuEngineP2mfReg.DstHeight);
+
             int LineLengthIn = ReadRegister(NvGpuEngineP2mfReg.LineLengthIn);
+            int LineCount    = ReadRegister(NvGpuEngineP2mfReg.LineCount);
 
-            DataBuffer = null;
+            CopyLinear = (Control & 1) != 0;
 
-            Gpu.Fifo.Step();
+            CopyGobBlockHeight = 1 << ((DstBlkDim >> 4) & 0xf);
 
-            for (int Offset = 0; Offset < LineLengthIn; Offset += 4)
-            {
-                Vmm.WriteInt32(DstAddress + Offset, DataBuffer[Offset >> 2]);
-            }
+            CopyStartX = DstX;
+            CopyStartY = DstY;
+
+            CopyWidth  = DstWidth;
+            CopyHeight = DstHeight;
+
+            CopyAddress = DstAddress;
+
+            CopyOffset = 0;
+            CopySize   = LineLengthIn * LineCount;
+
+            Buffer = new byte[CopySize];
         }
 
-        private void PushData(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
+        private void PushData(NvGpuVmm Vmm, GpuMethodCall MethCall)
         {
-            DataBuffer = PBEntry.Arguments;
+            if (Buffer == null)
+            {
+                return;
+            }
+
+            for (int Shift = 0; Shift < 32 && CopyOffset < CopySize; Shift += 8, CopyOffset++)
+            {
+                Buffer[CopyOffset] = (byte)(MethCall.Argument >> Shift);
+            }
+
+            if (MethCall.IsLastCall)
+            {
+                if (CopyLinear)
+                {
+                    Vmm.WriteBytes(CopyAddress, Buffer);
+                }
+                else
+                {
+                    BlockLinearSwizzle Swizzle = new BlockLinearSwizzle(CopyWidth, 1, CopyGobBlockHeight);
+
+                    int SrcOffset = 0;
+
+                    for (int Y = CopyStartY; Y < CopyHeight && SrcOffset < CopySize; Y++)
+                    for (int X = CopyStartX; X < CopyWidth  && SrcOffset < CopySize; X++)
+                    {
+                        int DstOffset = Swizzle.GetSwizzleOffset(X, Y);
+
+                        Vmm.WriteByte(CopyAddress + DstOffset, Buffer[SrcOffset++]);
+                    }
+                }
+
+                Buffer = null;
+            }
         }
 
         private long MakeInt64From2xInt32(NvGpuEngineP2mfReg Reg)
@@ -79,14 +143,9 @@ namespace Ryujinx.Graphics
                 (uint)Registers[(int)Reg + 1];
         }
 
-        private void WriteRegister(NvGpuPBEntry PBEntry)
+        private void WriteRegister(GpuMethodCall MethCall)
         {
-            int ArgsCount = PBEntry.Arguments.Count;
-
-            if (ArgsCount > 0)
-            {
-                Registers[PBEntry.Method] = PBEntry.Arguments[ArgsCount - 1];
-            }
+            Registers[MethCall.Method] = MethCall.Argument;
         }
 
         private int ReadRegister(NvGpuEngineP2mfReg Reg)
diff --git a/Ryujinx.Graphics/NvGpuFifo.cs b/Ryujinx.Graphics/NvGpuFifo.cs
index 16d16f5edf..a8d1b36d17 100644
--- a/Ryujinx.Graphics/NvGpuFifo.cs
+++ b/Ryujinx.Graphics/NvGpuFifo.cs
@@ -1,10 +1,8 @@
 using Ryujinx.Graphics.Memory;
-using System.Collections.Concurrent;
-using System.Threading;
 
 namespace Ryujinx.Graphics
 {
-    public class NvGpuFifo
+    class NvGpuFifo
     {
         private const int MacrosCount    = 0x80;
         private const int MacroIndexMask = MacrosCount - 1;
@@ -15,33 +13,47 @@ namespace Ryujinx.Graphics
 
         private NvGpu Gpu;
 
-        private ConcurrentQueue<(NvGpuVmm, NvGpuPBEntry[])> BufferQueue;
-
         private NvGpuEngine[] SubChannels;
 
-        public AutoResetEvent Event { get; private set; }
-
         private struct CachedMacro
         {
             public int Position { get; private set; }
 
+            private bool ExecutionPending;
+            private int  Argument;
+
             private MacroInterpreter Interpreter;
 
             public CachedMacro(NvGpuFifo PFifo, INvGpuEngine Engine, int Position)
             {
                 this.Position = Position;
 
+                ExecutionPending = false;
+                Argument         = 0;
+
                 Interpreter = new MacroInterpreter(PFifo, Engine);
             }
 
-            public void PushParam(int Param)
+            public void StartExecution(int Argument)
             {
-                Interpreter?.Fifo.Enqueue(Param);
+                this.Argument = Argument;
+
+                ExecutionPending = true;
             }
 
-            public void Execute(NvGpuVmm Vmm, int[] Mme, int Param)
+            public void Execute(NvGpuVmm Vmm, int[] Mme)
             {
-                Interpreter?.Execute(Vmm, Mme, Position, Param);
+                if (ExecutionPending)
+                {
+                    ExecutionPending = false;
+
+                    Interpreter?.Execute(Vmm, Mme, Position, Argument);
+                }
+            }
+
+            public void PushArgument(int Argument)
+            {
+                Interpreter?.Fifo.Enqueue(Argument);
             }
         }
 
@@ -56,148 +68,109 @@ namespace Ryujinx.Graphics
         {
             this.Gpu = Gpu;
 
-            BufferQueue = new ConcurrentQueue<(NvGpuVmm, NvGpuPBEntry[])>();
-
             SubChannels = new NvGpuEngine[8];
 
             Macros = new CachedMacro[MacrosCount];
 
             Mme = new int[MmeWords];
-
-            Event = new AutoResetEvent(false);
         }
 
-        public void PushBuffer(NvGpuVmm Vmm, NvGpuPBEntry[] Buffer)
+        public void CallMethod(NvGpuVmm Vmm, GpuMethodCall MethCall)
         {
-            BufferQueue.Enqueue((Vmm, Buffer));
-
-            Event.Set();
-        }
-
-        public void DispatchCalls()
-        {
-            while (Step());
-        }
-
-        private (NvGpuVmm Vmm, NvGpuPBEntry[] Pb) Curr;
-
-        private int CurrPbEntryIndex;
-
-        public bool Step()
-        {
-            while (Curr.Pb == null || Curr.Pb.Length <= CurrPbEntryIndex)
+            if ((NvGpuFifoMeth)MethCall.Method == NvGpuFifoMeth.BindChannel)
             {
-                if (!BufferQueue.TryDequeue(out Curr))
-                {
-                    return false;
-                }
+                NvGpuEngine Engine = (NvGpuEngine)MethCall.Argument;
 
-                Gpu.Engine3d.ResetCache();
-
-                Gpu.ResourceManager.ClearPbCache();
-
-                CurrPbEntryIndex = 0;
-            }
-
-            CallMethod(Curr.Vmm, Curr.Pb[CurrPbEntryIndex++]);
-
-            return true;
-        }
-
-        private void CallMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
-        {
-            if ((NvGpuFifoMeth)PBEntry.Method == NvGpuFifoMeth.BindChannel)
-            {
-                NvGpuEngine Engine = (NvGpuEngine)PBEntry.Arguments[0];
-
-                SubChannels[PBEntry.SubChannel] = Engine;
+                SubChannels[MethCall.SubChannel] = Engine;
             }
             else
             {
-                switch (SubChannels[PBEntry.SubChannel])
+                switch (SubChannels[MethCall.SubChannel])
                 {
-                    case NvGpuEngine._2d:  Call2dMethod  (Vmm, PBEntry); break;
-                    case NvGpuEngine._3d:  Call3dMethod  (Vmm, PBEntry); break;
-                    case NvGpuEngine.P2mf: CallP2mfMethod(Vmm, PBEntry); break;
-                    case NvGpuEngine.M2mf: CallM2mfMethod(Vmm, PBEntry); break;
+                    case NvGpuEngine._2d:  Call2dMethod  (Vmm, MethCall); break;
+                    case NvGpuEngine._3d:  Call3dMethod  (Vmm, MethCall); break;
+                    case NvGpuEngine.P2mf: CallP2mfMethod(Vmm, MethCall); break;
+                    case NvGpuEngine.M2mf: CallM2mfMethod(Vmm, MethCall); break;
                 }
             }
         }
 
-        private void Call2dMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
+        private void Call2dMethod(NvGpuVmm Vmm, GpuMethodCall MethCall)
         {
-            Gpu.Engine2d.CallMethod(Vmm, PBEntry);
+            Gpu.Engine2d.CallMethod(Vmm, MethCall);
         }
 
-        private void Call3dMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
+        private void Call3dMethod(NvGpuVmm Vmm, GpuMethodCall MethCall)
         {
-            if (PBEntry.Method < 0x80)
+            if (MethCall.Method < 0x80)
             {
-                switch ((NvGpuFifoMeth)PBEntry.Method)
+                switch ((NvGpuFifoMeth)MethCall.Method)
                 {
                     case NvGpuFifoMeth.SetMacroUploadAddress:
                     {
-                        CurrMacroPosition = PBEntry.Arguments[0];
+                        CurrMacroPosition = MethCall.Argument;
 
                         break;
                     }
 
                     case NvGpuFifoMeth.SendMacroCodeData:
                     {
-                        foreach (int Arg in PBEntry.Arguments)
-                        {
-                            Mme[CurrMacroPosition++] = Arg;
-                        }
+                        Mme[CurrMacroPosition++] = MethCall.Argument;
+
                         break;
                     }
 
                     case NvGpuFifoMeth.SetMacroBindingIndex:
                     {
-                        CurrMacroBindIndex = PBEntry.Arguments[0];
+                        CurrMacroBindIndex = MethCall.Argument;
 
                         break;
                     }
 
                     case NvGpuFifoMeth.BindMacro:
                     {
-                        int Position = PBEntry.Arguments[0];
+                        int Position = MethCall.Argument;
 
                         Macros[CurrMacroBindIndex] = new CachedMacro(this, Gpu.Engine3d, Position);
 
                         break;
                     }
+
+                    default: CallP2mfMethod(Vmm, MethCall); break;
                 }
             }
-            else if (PBEntry.Method < 0xe00)
+            else if (MethCall.Method < 0xe00)
             {
-                Gpu.Engine3d.CallMethod(Vmm, PBEntry);
+                Gpu.Engine3d.CallMethod(Vmm, MethCall);
             }
             else
             {
-                int MacroIndex = (PBEntry.Method >> 1) & MacroIndexMask;
+                int MacroIndex = (MethCall.Method >> 1) & MacroIndexMask;
 
-                if ((PBEntry.Method & 1) != 0)
+                if ((MethCall.Method & 1) != 0)
                 {
-                    foreach (int Arg in PBEntry.Arguments)
-                    {
-                        Macros[MacroIndex].PushParam(Arg);
-                    }
+                    Macros[MacroIndex].PushArgument(MethCall.Argument);
                 }
                 else
                 {
-                    Macros[MacroIndex].Execute(Vmm, Mme, PBEntry.Arguments[0]);
+                    Macros[MacroIndex].StartExecution(MethCall.Argument);
+                }
+
+                if (MethCall.IsLastCall)
+                {
+                    Macros[MacroIndex].Execute(Vmm, Mme);
                 }
             }
         }
 
-        private void CallP2mfMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
+        private void CallP2mfMethod(NvGpuVmm Vmm, GpuMethodCall MethCall)
         {
-            Gpu.EngineP2mf.CallMethod(Vmm, PBEntry);
+            Gpu.EngineP2mf.CallMethod(Vmm, MethCall);
         }
 
-        private void CallM2mfMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
+        private void CallM2mfMethod(NvGpuVmm Vmm, GpuMethodCall MethCall)
         {
-            Gpu.EngineM2mf.CallMethod(Vmm, PBEntry);
+            Gpu.EngineM2mf.CallMethod(Vmm, MethCall);
         }
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.Graphics/NvGpuMethod.cs b/Ryujinx.Graphics/NvGpuMethod.cs
index 5babf2c32f..83f3312ae6 100644
--- a/Ryujinx.Graphics/NvGpuMethod.cs
+++ b/Ryujinx.Graphics/NvGpuMethod.cs
@@ -2,5 +2,5 @@ using Ryujinx.Graphics.Memory;
 
 namespace Ryujinx.Graphics
 {
-    delegate void NvGpuMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry);
+    delegate void NvGpuMethod(NvGpuVmm Vmm, GpuMethodCall MethCall);
 }
\ No newline at end of file
diff --git a/Ryujinx.Graphics/Texture/ImageUtils.cs b/Ryujinx.Graphics/Texture/ImageUtils.cs
index 661ed38dbe..89c29557e2 100644
--- a/Ryujinx.Graphics/Texture/ImageUtils.cs
+++ b/Ryujinx.Graphics/Texture/ImageUtils.cs
@@ -95,6 +95,7 @@ namespace Ryujinx.Graphics.Texture
             { GalImageFormat.RGBA32,      new ImageDescriptor(16, 1,  1,  TargetBuffer.Color) },
             { GalImageFormat.RGBA16,      new ImageDescriptor(8,  1,  1,  TargetBuffer.Color) },
             { GalImageFormat.RG32,        new ImageDescriptor(8,  1,  1,  TargetBuffer.Color) },
+            { GalImageFormat.RGBX8,       new ImageDescriptor(4,  1,  1,  TargetBuffer.Color) },
             { GalImageFormat.RGBA8,       new ImageDescriptor(4,  1,  1,  TargetBuffer.Color) },
             { GalImageFormat.BGRA8,       new ImageDescriptor(4,  1,  1,  TargetBuffer.Color) },
             { GalImageFormat.RGB10A2,     new ImageDescriptor(4,  1,  1,  TargetBuffer.Color) },
@@ -131,9 +132,10 @@ namespace Ryujinx.Graphics.Texture
             { GalImageFormat.Astc2D10x5,  new ImageDescriptor(16, 10, 5,  TargetBuffer.Color) },
             { GalImageFormat.Astc2D10x6,  new ImageDescriptor(16, 10, 6,  TargetBuffer.Color) },
 
+            { GalImageFormat.D16,   new ImageDescriptor(2, 1, 1, TargetBuffer.Depth)        },
+            { GalImageFormat.D24,   new ImageDescriptor(4, 1, 1, TargetBuffer.Depth)        },
             { GalImageFormat.D24S8, new ImageDescriptor(4, 1, 1, TargetBuffer.DepthStencil) },
             { GalImageFormat.D32,   new ImageDescriptor(4, 1, 1, TargetBuffer.Depth)        },
-            { GalImageFormat.D16,   new ImageDescriptor(2, 1, 1, TargetBuffer.Depth)        },
             { GalImageFormat.D32S8, new ImageDescriptor(8, 1, 1, TargetBuffer.DepthStencil) }
         };
 
@@ -198,6 +200,7 @@ namespace Ryujinx.Graphics.Texture
                 case GalSurfaceFormat.R8Uint:         return GalImageFormat.R8        | Uint;
                 case GalSurfaceFormat.B5G6R5Unorm:    return GalImageFormat.RGB565    | Unorm;
                 case GalSurfaceFormat.BGR5A1Unorm:    return GalImageFormat.BGR5A1    | Unorm;
+                case GalSurfaceFormat.RGBX8Unorm:     return GalImageFormat.RGBX8     | Unorm;
             }
 
             throw new NotImplementedException(Format.ToString());
@@ -210,6 +213,7 @@ namespace Ryujinx.Graphics.Texture
                 case GalZetaFormat.D32Float:      return GalImageFormat.D32   | Float;
                 case GalZetaFormat.S8D24Unorm:    return GalImageFormat.D24S8 | Unorm;
                 case GalZetaFormat.D16Unorm:      return GalImageFormat.D16   | Unorm;
+                case GalZetaFormat.D24X8Unorm:    return GalImageFormat.D24   | Unorm;
                 case GalZetaFormat.D24S8Unorm:    return GalImageFormat.D24S8 | Unorm;
                 case GalZetaFormat.D32S8X24Float: return GalImageFormat.D32S8 | Float;
             }
@@ -247,7 +251,7 @@ namespace Ryujinx.Graphics.Texture
             {
                 int OutOffs = Y * Pitch;
 
-                for (int X = 0; X < Width;  X++)
+                for (int X = 0; X < Width; X++)
                 {
                     long Offset = (uint)Swizzle.GetSwizzleOffset(X, Y);
 
@@ -283,6 +287,45 @@ namespace Ryujinx.Graphics.Texture
             }
         }
 
+        public static bool CopyTexture(
+            NvGpuVmm Vmm,
+            GalImage SrcImage,
+            GalImage DstImage,
+            long     SrcAddress,
+            long     DstAddress,
+            int      SrcX,
+            int      SrcY,
+            int      DstX,
+            int      DstY,
+            int      Width,
+            int      Height)
+        {
+            ISwizzle SrcSwizzle = TextureHelper.GetSwizzle(SrcImage);
+            ISwizzle DstSwizzle = TextureHelper.GetSwizzle(DstImage);
+
+            ImageDescriptor Desc = GetImageDescriptor(SrcImage.Format);
+
+            if (GetImageDescriptor(DstImage.Format).BytesPerPixel != Desc.BytesPerPixel)
+            {
+                return false;
+            }
+
+            int BytesPerPixel = Desc.BytesPerPixel;
+
+            for (int Y = 0; Y < Height; Y++)
+            for (int X = 0; X < Width;  X++)
+            {
+                long SrcOffset = (uint)SrcSwizzle.GetSwizzleOffset(SrcX + X, SrcY + Y);
+                long DstOffset = (uint)DstSwizzle.GetSwizzleOffset(DstX + X, DstY + Y);
+
+                byte[] Texel = Vmm.ReadBytes(SrcAddress + SrcOffset, BytesPerPixel);
+
+                Vmm.WriteBytes(DstAddress + DstOffset, Texel);
+            }
+
+            return true;
+        }
+
         public static int GetSize(GalImage Image)
         {
             ImageDescriptor Desc = GetImageDescriptor(Image.Format);
diff --git a/Ryujinx.Graphics/ValueRangeSet.cs b/Ryujinx.Graphics/ValueRangeSet.cs
index 479f41ed27..42125bcece 100644
--- a/Ryujinx.Graphics/ValueRangeSet.cs
+++ b/Ryujinx.Graphics/ValueRangeSet.cs
@@ -76,7 +76,7 @@ namespace Ryujinx.Graphics
                 {
                     Ranges.RemoveAt(NewIndex + 1);
 
-                    Ranges[NewIndex] = new ValueRange<T>(Range.Start, Next.End, Range.Value);
+                    Ranges[NewIndex] = new ValueRange<T>(Ranges[NewIndex].Start, Next.End, Range.Value);
                 }
             }
         }
diff --git a/Ryujinx.HLE/HOS/Horizon.cs b/Ryujinx.HLE/HOS/Horizon.cs
index af175bd469..1b336647fa 100644
--- a/Ryujinx.HLE/HOS/Horizon.cs
+++ b/Ryujinx.HLE/HOS/Horizon.cs
@@ -333,7 +333,7 @@ namespace Ryujinx.HLE.HOS
             {
                 Device.FileSystem.SetRomFs(RomfsStream);
             }
-            
+
             Pfs Exefs = new Pfs(ExefsStream);
 
             Npdm MetaData = null;
diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvHostChannel/NvHostChannelIoctl.cs b/Ryujinx.HLE/HOS/Services/Nv/NvHostChannel/NvHostChannelIoctl.cs
index 5443a3bf70..39f39d4569 100644
--- a/Ryujinx.HLE/HOS/Services/Nv/NvHostChannel/NvHostChannelIoctl.cs
+++ b/Ryujinx.HLE/HOS/Services/Nv/NvHostChannel/NvHostChannelIoctl.cs
@@ -181,15 +181,7 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvHostChannel
 
         private static void PushGpfifo(ServiceCtx Context, NvGpuVmm Vmm, long Gpfifo)
         {
-            long VA = Gpfifo & 0xff_ffff_ffff;
-
-            int Size = (int)(Gpfifo >> 40) & 0x7ffffc;
-
-            byte[] Data = Vmm.ReadBytes(VA, Size);
-
-            NvGpuPBEntry[] PushBuffer = NvGpuPushBuffer.Decode(Data);
-
-            Context.Device.Gpu.Fifo.PushBuffer(Vmm, PushBuffer);
+            Context.Device.Gpu.Pusher.Push(Vmm, Gpfifo);
         }
 
         public static NvChannel GetChannel(ServiceCtx Context, NvChannelName Channel)
diff --git a/Ryujinx.HLE/Switch.cs b/Ryujinx.HLE/Switch.cs
index fe0be6cef6..8de49ca4a8 100644
--- a/Ryujinx.HLE/Switch.cs
+++ b/Ryujinx.HLE/Switch.cs
@@ -88,12 +88,12 @@ namespace Ryujinx.HLE
 
         public bool WaitFifo()
         {
-            return Gpu.Fifo.Event.WaitOne(8);
+            return Gpu.Pusher.WaitForCommands();
         }
 
         public void ProcessFrame()
         {
-            Gpu.Fifo.DispatchCalls();
+            Gpu.Pusher.DispatchCalls();
         }
 
         internal void Unload()