diff --git a/Ryujinx.Graphics/Gal/GalPipelineState.cs b/Ryujinx.Graphics/Gal/GalPipelineState.cs
index d1ffbe76de..7c66951415 100644
--- a/Ryujinx.Graphics/Gal/GalPipelineState.cs
+++ b/Ryujinx.Graphics/Gal/GalPipelineState.cs
@@ -7,6 +7,8 @@
         public bool Enabled;
         public int Stride;
         public long VboKey;
+        public bool Instanced;
+        public int Divisor;
         public GalVertexAttrib[] Attribs;
     }
 
@@ -22,6 +24,8 @@
         public float FlipX;
         public float FlipY;
 
+        public int Instance;
+
         public GalFrontFace FrontFace;
 
         public bool CullFaceEnabled;
diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLPipeline.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLPipeline.cs
index 5828921d44..051b105048 100644
--- a/Ryujinx.Graphics/Gal/OpenGL/OGLPipeline.cs
+++ b/Ryujinx.Graphics/Gal/OpenGL/OGLPipeline.cs
@@ -126,9 +126,9 @@ namespace Ryujinx.Graphics.Gal.OpenGL
 
             BindVertexLayout(New);
 
-            if (New.FlipX != Old.FlipX || New.FlipY != Old.FlipY)
+            if (New.FlipX != Old.FlipX || New.FlipY != Old.FlipY || New.Instance != Old.Instance)
             {
-                Shader.SetFlip(New.FlipX, New.FlipY);
+                Shader.SetExtraData(New.FlipX, New.FlipY, New.Instance);
             }
 
             //Note: Uncomment SetFrontFace and SetCullFace when flipping issues are solved
@@ -290,8 +290,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL
 
         private void BindConstBuffers(GalPipelineState New)
         {
-            //Index 0 is reserved
-            int FreeBinding = 1;
+            int FreeBinding = OGLShader.ReservedCbufCount;
 
             void BindIfNotNull(OGLShaderStage Stage)
             {
@@ -385,6 +384,15 @@ namespace Ryujinx.Graphics.Gal.OpenGL
                     {
                         GL.VertexAttribPointer(Attrib.Index, Size, Type, Normalize, Binding.Stride, Offset);
                     }
+
+                    if (Binding.Instanced && Binding.Divisor != 0)
+                    {
+                        GL.VertexAttribDivisor(Attrib.Index, 1);
+                    }
+                    else
+                    {
+                        GL.VertexAttribDivisor(Attrib.Index, 0);
+                    }
                 }
             }
         }
diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs
index 0108a0da21..73d37b8791 100644
--- a/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs
+++ b/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs
@@ -9,6 +9,10 @@ namespace Ryujinx.Graphics.Gal.OpenGL
 {
     class OGLShader : IGalShader
     {
+        public const int ReservedCbufCount = 1;
+
+        private const int ExtraDataSize = 4;
+
         public OGLShaderProgram Current;
 
         private ConcurrentDictionary<long, OGLShaderStage> Stages;
@@ -96,7 +100,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL
             return Enumerable.Empty<ShaderDeclInfo>();
         }
 
-        public unsafe void SetFlip(float X, float Y)
+        public unsafe void SetExtraData(float FlipX, float FlipY, int Instance)
         {
             BindProgram();
 
@@ -104,14 +108,15 @@ namespace Ryujinx.Graphics.Gal.OpenGL
 
             GL.BindBuffer(BufferTarget.UniformBuffer, ExtraUboHandle);
 
-            float* Data = stackalloc float[4];
-            Data[0] = X;
-            Data[1] = Y;
+            float* Data = stackalloc float[ExtraDataSize];
+            Data[0] = FlipX;
+            Data[1] = FlipY;
+            Data[2] = BitConverter.Int32BitsToSingle(Instance);
 
             //Invalidate buffer
-            GL.BufferData(BufferTarget.UniformBuffer, 4 * sizeof(float), IntPtr.Zero, BufferUsageHint.StreamDraw);
+            GL.BufferData(BufferTarget.UniformBuffer, ExtraDataSize * sizeof(float), IntPtr.Zero, BufferUsageHint.StreamDraw);
 
-            GL.BufferSubData(BufferTarget.UniformBuffer, IntPtr.Zero, 4 * sizeof(float), (IntPtr)Data);
+            GL.BufferSubData(BufferTarget.UniformBuffer, IntPtr.Zero, ExtraDataSize * sizeof(float), (IntPtr)Data);
         }
 
         public void Bind(long Key)
@@ -197,7 +202,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL
 
                 GL.BindBuffer(BufferTarget.UniformBuffer, ExtraUboHandle);
 
-                GL.BufferData(BufferTarget.UniformBuffer, 4 * sizeof(float), IntPtr.Zero, BufferUsageHint.StreamDraw);
+                GL.BufferData(BufferTarget.UniformBuffer, ExtraDataSize * sizeof(float), IntPtr.Zero, BufferUsageHint.StreamDraw);
 
                 GL.BindBufferBase(BufferRangeTarget.UniformBuffer, 0, ExtraUboHandle);
             }
@@ -219,8 +224,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL
 
             GL.UniformBlockBinding(ProgramHandle, ExtraBlockindex, 0);
 
-            //First index is reserved
-            int FreeBinding = 1;
+            int FreeBinding = ReservedCbufCount;
 
             void BindUniformBlocksIfNotNull(OGLShaderStage Stage)
             {
diff --git a/Ryujinx.Graphics/Gal/Shader/GlslDecl.cs b/Ryujinx.Graphics/Gal/Shader/GlslDecl.cs
index 25f64db83d..c22a282dcc 100644
--- a/Ryujinx.Graphics/Gal/Shader/GlslDecl.cs
+++ b/Ryujinx.Graphics/Gal/Shader/GlslDecl.cs
@@ -41,6 +41,7 @@ namespace Ryujinx.Graphics.Gal.Shader
 
         public const string ExtraUniformBlockName = "Extra";
         public const string FlipUniformName = "flip";
+        public const string InstanceUniformName = "instance";
 
         public const string ProgramName  = "program";
         public const string ProgramAName = ProgramName + "_a";
diff --git a/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs b/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs
index 8baf30e039..984684f16b 100644
--- a/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs
+++ b/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs
@@ -241,10 +241,15 @@ namespace Ryujinx.Graphics.Gal.Shader
         {
             if (Decl.ShaderType == GalShaderType.Vertex)
             {
-                SB.AppendLine("layout (std140) uniform " + GlslDecl.ExtraUniformBlockName + "{");
+                //Memory layout here is [flip_x, flip_y, instance, unused]
+                //It's using 4 bytes, not 8
+
+                SB.AppendLine("layout (std140) uniform " + GlslDecl.ExtraUniformBlockName + " {");
 
                 SB.AppendLine(IdentationStr + "vec2 " + GlslDecl.FlipUniformName + ";");
 
+                SB.AppendLine(IdentationStr + "int " + GlslDecl.InstanceUniformName + ";");
+
                 SB.AppendLine("};");
             }
 
@@ -816,7 +821,7 @@ namespace Ryujinx.Graphics.Gal.Shader
                 switch (Abuf.Offs)
                 {
                     case GlslDecl.VertexIdAttr:   return "gl_VertexID";
-                    case GlslDecl.InstanceIdAttr: return "gl_InstanceID";
+                    case GlslDecl.InstanceIdAttr: return GlslDecl.InstanceUniformName;
                 }
             }
             else if (Decl.ShaderType == GalShaderType.TessEvaluation)
diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs
index be47b66ca0..2010e43bd2 100644
--- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs
+++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs
@@ -27,6 +27,8 @@ namespace Ryujinx.HLE.Gpu.Engines
 
         private List<long>[] UploadedKeys;
 
+        private int CurrentInstance = 0;
+
         public NvGpuEngine3d(NvGpu Gpu)
         {
             this.Gpu = Gpu;
@@ -654,10 +656,25 @@ namespace Ryujinx.HLE.Gpu.Engines
                 long VertexPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.VertexArrayNAddress + Index * 4);
                 long VertexEndPos   = MakeInt64From2xInt32(NvGpuEngine3dReg.VertexArrayNEndAddr + Index * 2);
 
-                long VboKey = Vmm.GetPhysicalAddress(VertexPosition);
+                int VertexDivisor = ReadRegister(NvGpuEngine3dReg.VertexArrayNDivisor + Index * 4);
+
+                bool Instanced = (ReadRegister(NvGpuEngine3dReg.VertexArrayNInstance + Index) & 1) != 0;
 
                 int Stride = Control & 0xfff;
 
+                if (Instanced && VertexDivisor != 0)
+                {
+                    VertexPosition += Stride * (CurrentInstance / VertexDivisor);
+                }
+
+                if (VertexPosition > VertexEndPos)
+                {
+                    //Instance is invalid, ignore the draw call
+                    continue;
+                }
+
+                long VboKey = Vmm.GetPhysicalAddress(VertexPosition);
+
                 long VbSize = (VertexEndPos - VertexPosition) + 1;
 
                 bool VboCached = Gpu.Renderer.Rasterizer.IsVboCached(VboKey, VbSize);
@@ -669,10 +686,12 @@ namespace Ryujinx.HLE.Gpu.Engines
                     Gpu.Renderer.Rasterizer.CreateVbo(VboKey, (int)VbSize, DataAddress);
                 }
 
-                State.VertexBindings[Index].Enabled = true;
-                State.VertexBindings[Index].Stride  = Stride;
-                State.VertexBindings[Index].VboKey  = VboKey;
-                State.VertexBindings[Index].Attribs = Attribs[Index].ToArray();
+                State.VertexBindings[Index].Enabled   = true;
+                State.VertexBindings[Index].Stride    = Stride;
+                State.VertexBindings[Index].VboKey    = VboKey;
+                State.VertexBindings[Index].Instanced = Instanced;
+                State.VertexBindings[Index].Divisor   = VertexDivisor;
+                State.VertexBindings[Index].Attribs   = Attribs[Index].ToArray();
             }
         }
 
@@ -683,6 +702,25 @@ namespace Ryujinx.HLE.Gpu.Engines
 
             GalPrimitiveType PrimType = (GalPrimitiveType)(PrimCtrl & 0xffff);
 
+            bool InstanceNext = ((PrimCtrl >> 26) & 1) != 0;
+            bool InstanceCont = ((PrimCtrl >> 27) & 1) != 0;
+
+            if (InstanceNext && InstanceCont)
+            {
+                throw new InvalidOperationException("GPU tried to increase and reset instance count at the same time");
+            }
+
+            if (InstanceNext)
+            {
+                CurrentInstance++;
+            }
+            else if (!InstanceCont)
+            {
+                CurrentInstance = 0;
+            }
+
+            State.Instance = CurrentInstance;
+
             Gpu.Renderer.Pipeline.Bind(State);
 
             if (IndexCount != 0)
diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs
index 7eff13b58f..ace324e91d 100644
--- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs
+++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs
@@ -53,6 +53,7 @@ namespace Ryujinx.HLE.Gpu.Engines
         StencilFrontFuncMask = 0x4e6,
         StencilFrontMask     = 0x4e7,
         VertexArrayElemBase  = 0x50d,
+        VertexArrayInstBase  = 0x50e,
         ZetaEnable           = 0x54e,
         TexHeaderPoolOffset  = 0x55d,
         TexSamplerPoolOffset = 0x557,
@@ -70,6 +71,7 @@ namespace Ryujinx.HLE.Gpu.Engines
         IndexArrayFormat     = 0x5f6,
         IndexBatchFirst      = 0x5f7,
         IndexBatchCount      = 0x5f8,
+        VertexArrayNInstance = 0x620,
         CullFaceEnable       = 0x646,
         FrontFace            = 0x647,
         CullFace             = 0x648,