diff --git a/Ryujinx.HLE/Gpu/MacroInterpreter.cs b/Ryujinx.HLE/Gpu/MacroInterpreter.cs
index 58a236ad36..c333046a98 100644
--- a/Ryujinx.HLE/Gpu/MacroInterpreter.cs
+++ b/Ryujinx.HLE/Gpu/MacroInterpreter.cs
@@ -56,7 +56,7 @@ namespace Ryujinx.HLE.Gpu
 
         private int PipeOp;
 
-        private long Pc;
+        private int Pc;
 
         public MacroInterpreter(NvGpuFifo PFifo, INvGpuEngine Engine)
         {
@@ -68,7 +68,7 @@ namespace Ryujinx.HLE.Gpu
             Gprs = new int[8];
         }
 
-        public void Execute(NvGpuVmm Vmm, long Position, int Param)
+        public void Execute(NvGpuVmm Vmm, int[] Mme, int Position, int Param)
         {
             Reset();
 
@@ -76,13 +76,13 @@ namespace Ryujinx.HLE.Gpu
 
             Pc = Position;
 
-            FetchOpCode(Vmm);
+            FetchOpCode(Mme);
 
-            while (Step(Vmm));
+            while (Step(Vmm, Mme));
 
             //Due to the delay slot, we still need to execute
             //one more instruction before we actually exit.
-            Step(Vmm);
+            Step(Vmm, Mme);
         }
 
         private void Reset()
@@ -98,11 +98,11 @@ namespace Ryujinx.HLE.Gpu
             Carry = false;
         }
 
-        private bool Step(NvGpuVmm Vmm)
+        private bool Step(NvGpuVmm Vmm, int[] Mme)
         {
-            long BaseAddr = Pc - 4;
+            int BaseAddr = Pc - 1;
 
-            FetchOpCode(Vmm);
+            FetchOpCode(Mme);
 
             if ((OpCode & 7) < 7)
             {
@@ -205,13 +205,13 @@ namespace Ryujinx.HLE.Gpu
 
                 if (Taken)
                 {
-                    Pc = BaseAddr + (GetImm() << 2);
+                    Pc = BaseAddr + GetImm();
 
                     bool NoDelays = (OpCode & 0x20) != 0;
 
                     if (NoDelays)
                     {
-                        FetchOpCode(Vmm);
+                        FetchOpCode(Mme);
                     }
 
                     return true;
@@ -223,13 +223,11 @@ namespace Ryujinx.HLE.Gpu
             return !Exit;
         }
 
-        private void FetchOpCode(NvGpuVmm Vmm)
+        private void FetchOpCode(int[] Mme)
         {
             OpCode = PipeOp;
 
-            PipeOp = Vmm.ReadInt32(Pc);
-
-            Pc += 4;
+            PipeOp = Mme[Pc++];
         }
 
         private int GetAluResult()
diff --git a/Ryujinx.HLE/Gpu/NvGpuEngineDma.cs b/Ryujinx.HLE/Gpu/NvGpuEngineDma.cs
index 48a19047bc..ed7819e960 100644
--- a/Ryujinx.HLE/Gpu/NvGpuEngineDma.cs
+++ b/Ryujinx.HLE/Gpu/NvGpuEngineDma.cs
@@ -1,4 +1,3 @@
-using Ryujinx.Graphics.Gal;
 using System.Collections.Generic;
 
 namespace Ryujinx.HLE.Gpu
diff --git a/Ryujinx.HLE/Gpu/NvGpuFifo.cs b/Ryujinx.HLE/Gpu/NvGpuFifo.cs
index f115e8db2f..361c8bcec4 100644
--- a/Ryujinx.HLE/Gpu/NvGpuFifo.cs
+++ b/Ryujinx.HLE/Gpu/NvGpuFifo.cs
@@ -7,6 +7,10 @@ namespace Ryujinx.HLE.Gpu
         private const int MacrosCount    = 0x80;
         private const int MacroIndexMask = MacrosCount - 1;
 
+        //Note: The size of the macro memory is unknown, we just make
+        //a guess here and use 256kb as the size. Increase if needed.
+        private const int MmeWords = 256 * 256;
+
         private NvGpu Gpu;
 
         private ConcurrentQueue<(NvGpuVmm, NvGpuPBEntry)> BufferQueue;
@@ -15,11 +19,11 @@ namespace Ryujinx.HLE.Gpu
 
         private struct CachedMacro
         {
-            public long Position { get; private set; }
+            public int Position { get; private set; }
 
             private MacroInterpreter Interpreter;
 
-            public CachedMacro(NvGpuFifo PFifo, INvGpuEngine Engine, long Position)
+            public CachedMacro(NvGpuFifo PFifo, INvGpuEngine Engine, int Position)
             {
                 this.Position = Position;
 
@@ -31,17 +35,19 @@ namespace Ryujinx.HLE.Gpu
                 Interpreter?.Fifo.Enqueue(Param);
             }
 
-            public void Execute(NvGpuVmm Vmm, int Param)
+            public void Execute(NvGpuVmm Vmm, int[] Mme, int Param)
             {
-                Interpreter?.Execute(Vmm, Position, Param);
+                Interpreter?.Execute(Vmm, Mme, Position, Param);
             }
         }
 
-        private long CurrMacroPosition;
-        private int  CurrMacroBindIndex;
+        private int CurrMacroPosition;
+        private int CurrMacroBindIndex;
 
         private CachedMacro[] Macros;
 
+        private int[] Mme;
+
         public NvGpuFifo(NvGpu Gpu)
         {
             this.Gpu = Gpu;
@@ -51,6 +57,8 @@ namespace Ryujinx.HLE.Gpu
             SubChannels = new NvGpuEngine[8];
 
             Macros = new CachedMacro[MacrosCount];
+
+            Mme = new int[MmeWords];
         }
 
         public void PushBuffer(NvGpuVmm Vmm, NvGpuPBEntry[] Buffer)
@@ -95,22 +103,16 @@ namespace Ryujinx.HLE.Gpu
 
                     case NvGpuFifoMeth.SetMacroUploadAddress:
                     {
-                        CurrMacroPosition = (long)((ulong)PBEntry.Arguments[0] << 2);
+                        CurrMacroPosition = PBEntry.Arguments[0];
 
                         break;
                     }
 
                     case NvGpuFifoMeth.SendMacroCodeData:
                     {
-                        long Position = CurrMacroPosition;
-
                         foreach (int Arg in PBEntry.Arguments)
                         {
-                            Vmm.WriteInt32(Position, Arg);
-
-                            CurrMacroPosition += 4;
-
-                            Position += 4;
+                            Mme[CurrMacroPosition++] = Arg;
                         }
                         break;
                     }
@@ -124,7 +126,7 @@ namespace Ryujinx.HLE.Gpu
 
                     case NvGpuFifoMeth.BindMacro:
                     {
-                        long Position = (long)((ulong)PBEntry.Arguments[0] << 2);
+                        int Position = PBEntry.Arguments[0];
 
                         Macros[CurrMacroBindIndex] = new CachedMacro(this, Gpu.Engine3d, Position);
 
@@ -167,7 +169,7 @@ namespace Ryujinx.HLE.Gpu
                 }
                 else
                 {
-                    Macros[MacroIndex].Execute(Vmm, PBEntry.Arguments[0]);
+                    Macros[MacroIndex].Execute(Vmm, Mme, PBEntry.Arguments[0]);
                 }
             }
         }