diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
index acd656210d..4c6224e3f9 100644
--- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
@@ -40,7 +40,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <summary>
         /// Version of the codegen (to be changed when codegen or guest format change).
         /// </summary>
-        private const ulong ShaderCodeGenVersion = 2764;
+        private const ulong ShaderCodeGenVersion = 2972;
 
         // Progress reporting helpers
         private volatile int _shaderCount;
diff --git a/Ryujinx.Graphics.Shader/Decoders/Decoder.cs b/Ryujinx.Graphics.Shader/Decoders/Decoder.cs
index 80d2cb4af1..8820527f13 100644
--- a/Ryujinx.Graphics.Shader/Decoders/Decoder.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/Decoder.cs
@@ -95,7 +95,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
                         if (currBlock.OpCodes.Count != 0)
                         {
                             // We should have blocks for all possible branch targets,
-                            // including those from SSY/PBK instructions.
+                            // including those from PBK/PCNT/SSY instructions.
                             foreach (PushOpInfo pushOp in currBlock.PushOpCodes)
                             {
                                 GetBlock(pushOp.Op.GetAbsoluteAddress());
@@ -243,7 +243,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
                 {
                     SetUserAttributeUses(config, op.Name, opCode);
                 }
-                else if (op.Name == InstName.Ssy || op.Name == InstName.Pbk)
+                else if (op.Name == InstName.Pbk || op.Name == InstName.Pcnt || op.Name == InstName.Ssy)
                 {
                     block.AddPushOp(op);
                 }
@@ -512,8 +512,9 @@ namespace Ryujinx.Graphics.Shader.Decoders
 
         private enum MergeType
         {
-            Brk = 0,
-            Sync = 1
+            Brk,
+            Cont,
+            Sync
         }
 
         private struct PathBlockState
@@ -629,7 +630,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
                     for (int index = pushOpIndex; index < pushOpsCount; index++)
                     {
                         InstOp currentPushOp = current.PushOpCodes[index].Op;
-                        MergeType pushMergeType = currentPushOp.Name == InstName.Ssy ? MergeType.Sync : MergeType.Brk;
+                        MergeType pushMergeType = GetMergeTypeFromPush(currentPushOp.Name);
                         branchStack.Push((currentPushOp.GetAbsoluteAddress(), pushMergeType));
                     }
                 }
@@ -643,9 +644,9 @@ namespace Ryujinx.Graphics.Shader.Decoders
                 }
 
                 InstOp lastOp = current.GetLastOp();
-                if (lastOp.Name == InstName.Sync || lastOp.Name == InstName.Brk)
+                if (IsPopBranch(lastOp.Name))
                 {
-                    MergeType popMergeType = lastOp.Name == InstName.Sync ? MergeType.Sync : MergeType.Brk;
+                    MergeType popMergeType = GetMergeTypeFromPop(lastOp.Name);
 
                     bool found = true;
                     ulong targetAddress = 0UL;
@@ -662,7 +663,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
                         (targetAddress, mergeType) = branchStack.Pop();
 
                         // Push the target address (this will be used to push the address
-                        // back into the SSY/PBK stack when we return from that block),
+                        // back into the PBK/PCNT/SSY stack when we return from that block),
                         Push(new PathBlockState(targetAddress, mergeType));
                     }
                     while (mergeType != popMergeType);
@@ -705,5 +706,30 @@ namespace Ryujinx.Graphics.Shader.Decoders
                 }
             }
         }
+
+        public static bool IsPopBranch(InstName name)
+        {
+            return name == InstName.Brk || name == InstName.Cont || name == InstName.Sync;
+        }
+
+        private static MergeType GetMergeTypeFromPush(InstName name)
+        {
+            return name switch
+            {
+                InstName.Pbk => MergeType.Brk,
+                InstName.Pcnt => MergeType.Cont,
+                _ => MergeType.Sync
+            };
+        }
+
+        private static MergeType GetMergeTypeFromPop(InstName name)
+        {
+            return name switch
+            {
+                InstName.Brk => MergeType.Brk,
+                InstName.Cont => MergeType.Cont,
+                _ => MergeType.Sync
+            };
+        }
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs b/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs
index 397e327e1f..b7a0caf10d 100644
--- a/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs
@@ -1960,7 +1960,6 @@ namespace Ryujinx.Graphics.Shader.Decoders
         public int Dest => (int)((_opcode >> 0) & 0xFF);
         public int SrcA => (int)((_opcode >> 8) & 0xFF);
         public int Imm32 => (int)(_opcode >> 20);
-        public int SrcC => (int)((_opcode >> 39) & 0xFF);
         public int Pred => (int)((_opcode >> 16) & 0x7);
         public bool PredInv => (_opcode & 0x80000) != 0;
         public bool NegC => (_opcode & 0x200000000000000) != 0;
@@ -2460,7 +2459,6 @@ namespace Ryujinx.Graphics.Shader.Decoders
         public int Dest => (int)((_opcode >> 0) & 0xFF);
         public int SrcA => (int)((_opcode >> 8) & 0xFF);
         public int Imm => (int)(_opcode >> 20);
-        public int SrcC => (int)((_opcode >> 39) & 0xFF);
         public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3);
         public int Pred => (int)((_opcode >> 16) & 0x7);
         public bool PredInv => (_opcode & 0x80000) != 0;
diff --git a/Ryujinx.Graphics.Shader/Decoders/InstTable.cs b/Ryujinx.Graphics.Shader/Decoders/InstTable.cs
index 2d91f21a08..eb3d6f3d20 100644
--- a/Ryujinx.Graphics.Shader/Decoders/InstTable.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/InstTable.cs
@@ -55,7 +55,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
             Add("1110111110000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cctll,       InstEmit.Cctll,       InstProps.Ra);
             Add("1110101111110xx0000000000000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cctlt,       InstEmit.Cctlt);
             Add("1110101111101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cctlt,       InstEmit.Cctlt,       InstProps.Rc);
-            Add("111000110101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cont,        InstEmit.Cont);
+            Add("111000110101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cont,        InstEmit.Cont,        InstProps.Bra);
             Add("0101000010011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cset,        InstEmit.Cset,        InstProps.Rd  | InstProps.Ps);
             Add("0101000010100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Csetp,       InstEmit.Csetp,       InstProps.Pd  | InstProps.Pdn | InstProps.Ps);
             Add("0101000011001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cs2r,        InstEmit.Cs2r,        InstProps.Rd);
@@ -101,7 +101,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
             Add("0011001x1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ffma,        InstEmit.FfmaI,       InstProps.Rd  | InstProps.Ra  | InstProps.Ib  | InstProps.Rc);
             Add("010010011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ffma,        InstEmit.FfmaC,       InstProps.Rd  | InstProps.Ra  | InstProps.Rc);
             Add("010100011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ffma,        InstEmit.FfmaRc,      InstProps.Rd  | InstProps.Ra  | InstProps.Rc);
-            Add("000011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ffma32i,     InstEmit.Ffma32i,     InstProps.Rd  | InstProps.Ra  | InstProps.Rc);
+            Add("000011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ffma32i,     InstEmit.Ffma32i,     InstProps.Rd  | InstProps.Ra);
             Add("0101110000110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Flo,         InstEmit.FloR,        InstProps.Rd  | InstProps.Rb);
             Add("0011100x00110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Flo,         InstEmit.FloI,        InstProps.Rd  | InstProps.Ib);
             Add("0100110000110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Flo,         InstEmit.FloC,        InstProps.Rd);
@@ -129,6 +129,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
             Add("01110xxx0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hfma2,       InstEmit.Hfma2I,      InstProps.Rd  | InstProps.Ra  | InstProps.Ib  | InstProps.Rc);
             Add("01110xxx1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hfma2,       InstEmit.Hfma2C,      InstProps.Rd  | InstProps.Ra  | InstProps.Rc);
             Add("01100xxx1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hfma2,       InstEmit.Hfma2Rc,     InstProps.Rd  | InstProps.Ra  | InstProps.Rc);
+            Add("0010100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hfma2,       InstEmit.Hfma232i,    InstProps.Rd  | InstProps.Ra);
             Add("0101110100001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hmul2,       InstEmit.Hmul2R,      InstProps.Rd  | InstProps.Ra  | InstProps.Rb);
             Add("0111100x0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hmul2,       InstEmit.Hmul2I,      InstProps.Rd  | InstProps.Ra  | InstProps.Ib);
             Add("0111100x1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hmul2,       InstEmit.Hmul2C,      InstProps.Rd  | InstProps.Ra);
diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmit.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmit.cs
index b0cb702857..c242963a6d 100644
--- a/Ryujinx.Graphics.Shader/Instructions/InstEmit.cs
+++ b/Ryujinx.Graphics.Shader/Instructions/InstEmit.cs
@@ -54,18 +54,11 @@ namespace Ryujinx.Graphics.Shader.Instructions
             context.Config.GpuAccessor.Log("Shader instruction Cctlt is not implemented.");
         }
 
-        public static void Cont(EmitterContext context)
-        {
-            InstCont op = context.GetOp<InstCont>();
-
-            context.Config.GpuAccessor.Log("Shader instruction ContUnsup is not implemented.");
-        }
-
         public static void Cset(EmitterContext context)
         {
             InstCset op = context.GetOp<InstCset>();
 
-            context.Config.GpuAccessor.Log("Shader instruction CsetUnsup is not implemented.");
+            context.Config.GpuAccessor.Log("Shader instruction Cset is not implemented.");
         }
 
         public static void Cs2r(EmitterContext context)
@@ -159,34 +152,6 @@ namespace Ryujinx.Graphics.Shader.Instructions
             context.Config.GpuAccessor.Log("Shader instruction ImadspRc is not implemented.");
         }
 
-        public static void ImulR(EmitterContext context)
-        {
-            InstImulR op = context.GetOp<InstImulR>();
-
-            context.Config.GpuAccessor.Log("Shader instruction ImulR is not implemented.");
-        }
-
-        public static void ImulI(EmitterContext context)
-        {
-            InstImulI op = context.GetOp<InstImulI>();
-
-            context.Config.GpuAccessor.Log("Shader instruction ImulI is not implemented.");
-        }
-
-        public static void ImulC(EmitterContext context)
-        {
-            InstImulC op = context.GetOp<InstImulC>();
-
-            context.Config.GpuAccessor.Log("Shader instruction ImulC is not implemented.");
-        }
-
-        public static void Imul32i(EmitterContext context)
-        {
-            InstImul32i op = context.GetOp<InstImul32i>();
-
-            context.Config.GpuAccessor.Log("Shader instruction Imul32i is not implemented.");
-        }
-
         public static void Jcal(EmitterContext context)
         {
             InstJcal op = context.GetOp<InstJcal>();
@@ -250,13 +215,6 @@ namespace Ryujinx.Graphics.Shader.Instructions
             context.Config.GpuAccessor.Log("Shader instruction P2rC is not implemented.");
         }
 
-        public static void Pcnt(EmitterContext context)
-        {
-            InstPcnt op = context.GetOp<InstPcnt>();
-
-            context.Config.GpuAccessor.Log("Shader instruction Pcnt is not implemented.");
-        }
-
         public static void Pexit(EmitterContext context)
         {
             InstPexit op = context.GetOp<InstPexit>();
diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatArithmetic.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatArithmetic.cs
index 11d724c429..29803c31e8 100644
--- a/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatArithmetic.cs
+++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatArithmetic.cs
@@ -204,7 +204,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
 
             var srcA = GetSrcReg(context, op.SrcA);
             var srcB = GetSrcImm(context, op.Imm32);
-            var srcC = GetSrcReg(context, op.SrcC);
+            var srcC = GetSrcReg(context, op.Dest);
 
             EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
         }
@@ -333,13 +333,13 @@ namespace Ryujinx.Graphics.Shader.Instructions
             EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat);
         }
 
-        public static void Hfma232iI(EmitterContext context)
+        public static void Hfma232i(EmitterContext context)
         {
             InstHfma232i op = context.GetOp<InstHfma232i>();
 
             var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
             var srcB = GetHalfSrc(context, op.Imm);
-            var srcC = GetHalfSrc(context, HalfSwizzle.F16, op.SrcC, op.NegC, false);
+            var srcC = GetHalfSrc(context, HalfSwizzle.F16, op.Dest, op.NegC, false);
 
             EmitHfma2(context, OFmt.F16, srcA, srcB, srcC, op.Dest, saturate: false);
         }
diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs
index da34c1be53..3cb8fe727b 100644
--- a/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs
+++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs
@@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
         {
             InstBrk op = context.GetOp<InstBrk>();
 
-            EmitBrkOrSync(context);
+            EmitBrkContSync(context);
         }
 
         public static void Brx(EmitterContext context)
@@ -87,6 +87,13 @@ namespace Ryujinx.Graphics.Shader.Instructions
             }
         }
 
+        public static void Cont(EmitterContext context)
+        {
+            InstCont op = context.GetOp<InstCont>();
+
+            EmitBrkContSync(context);
+        }
+
         public static void Exit(EmitterContext context)
         {
             InstExit op = context.GetOp<InstExit>();
@@ -116,7 +123,14 @@ namespace Ryujinx.Graphics.Shader.Instructions
         {
             InstPbk op = context.GetOp<InstPbk>();
 
-            EmitPbkOrSsy(context);
+            EmitPbkPcntSsy(context);
+        }
+
+        public static void Pcnt(EmitterContext context)
+        {
+            InstPcnt op = context.GetOp<InstPcnt>();
+
+            EmitPbkPcntSsy(context);
         }
 
         public static void Ret(EmitterContext context)
@@ -137,17 +151,17 @@ namespace Ryujinx.Graphics.Shader.Instructions
         {
             InstSsy op = context.GetOp<InstSsy>();
 
-            EmitPbkOrSsy(context);
+            EmitPbkPcntSsy(context);
         }
 
         public static void Sync(EmitterContext context)
         {
             InstSync op = context.GetOp<InstSync>();
 
-            EmitBrkOrSync(context);
+            EmitBrkContSync(context);
         }
 
-        private static void EmitPbkOrSsy(EmitterContext context)
+        private static void EmitPbkPcntSsy(EmitterContext context)
         {
             var consumers = context.CurrBlock.PushOpCodes.First(x => x.Op.Address == context.CurrOp.Address).Consumers;
 
@@ -162,7 +176,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
             }
         }
 
-        private static void EmitBrkOrSync(EmitterContext context)
+        private static void EmitBrkContSync(EmitterContext context)
         {
             var targets = context.CurrBlock.SyncTargets;
 
diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerArithmetic.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerArithmetic.cs
index ac8cca1b77..374e3d6143 100644
--- a/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerArithmetic.cs
+++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerArithmetic.cs
@@ -138,6 +138,46 @@ namespace Ryujinx.Graphics.Shader.Instructions
             EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
         }
 
+        public static void ImulR(EmitterContext context)
+        {
+            InstImulR op = context.GetOp<InstImulR>();
+
+            var srcA = GetSrcReg(context, op.SrcA);
+            var srcB = GetSrcReg(context, op.SrcB);
+
+            EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo);
+        }
+
+        public static void ImulI(EmitterContext context)
+        {
+            InstImulI op = context.GetOp<InstImulI>();
+
+            var srcA = GetSrcReg(context, op.SrcA);
+            var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+
+            EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo);
+        }
+
+        public static void ImulC(EmitterContext context)
+        {
+            InstImulC op = context.GetOp<InstImulC>();
+
+            var srcA = GetSrcReg(context, op.SrcA);
+            var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+            EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo);
+        }
+
+        public static void Imul32i(EmitterContext context)
+        {
+            InstImul32i op = context.GetOp<InstImul32i>();
+
+            var srcA = GetSrcReg(context, op.SrcA);
+            var srcB = GetSrcImm(context, op.Imm32);
+
+            EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo);
+        }
+
         public static void IscaddR(EmitterContext context)
         {
             InstIscaddR op = context.GetOp<InstIscaddR>();
@@ -366,7 +406,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
             // TODO: CC, X, corner cases.
         }
 
-        public static void EmitImad(
+        private static void EmitImad(
             EmitterContext context,
             Operand srcA,
             Operand srcB,
@@ -407,7 +447,10 @@ namespace Ryujinx.Graphics.Shader.Instructions
                 res = context.IMultiply(srcA, srcB);
             }
 
-            res = context.IAdd(res, srcC);
+            if (srcC.Type != OperandType.Constant || srcC.Value != 0)
+            {
+                res = context.IAdd(res, srcC);
+            }
 
             // TODO: CC, X, SAT, and more?
 
diff --git a/Ryujinx.Graphics.Shader/Translation/Translator.cs b/Ryujinx.Graphics.Shader/Translation/Translator.cs
index cef25350a3..709b16db44 100644
--- a/Ryujinx.Graphics.Shader/Translation/Translator.cs
+++ b/Ryujinx.Graphics.Shader/Translation/Translator.cs
@@ -295,7 +295,7 @@ namespace Ryujinx.Graphics.Shader.Translation
 
                 Operand predSkipLbl = null;
 
-                if (op.Name == InstName.Sync || op.Name == InstName.Brk)
+                if (Decoder.IsPopBranch(op.Name))
                 {
                     // If the instruction is a SYNC or BRK instruction with only one
                     // possible target address, then the instruction is basically