diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs
index 78537787de..d54bb5a511 100644
--- a/ARMeilleure/Decoders/OpCodeTable.cs
+++ b/ARMeilleure/Decoders/OpCodeTable.cs
@@ -733,6 +733,7 @@ namespace ARMeilleure.Decoders
             SetA32("<<<<0000110xxxxxxxxxxxxx0xx1xxxx", InstName.Sbc,     InstEmit32.Sbc,     OpCode32AluRsReg.Create);
             SetA32("<<<<0111101xxxxxxxxxxxxxx101xxxx", InstName.Sbfx,    InstEmit32.Sbfx,    OpCode32AluBf.Create);
             SetA32("<<<<01110001xxxx1111xxxx0001xxxx", InstName.Sdiv,    InstEmit32.Sdiv,    OpCode32AluMla.Create);
+            SetA32("<<<<01100011xxxxxxxx11111001xxxx", InstName.Shadd8,  InstEmit32.Shadd8,  OpCode32AluReg.Create);
             SetA32("<<<<00010000xxxxxxxxxxxx1xx0xxxx", InstName.Smla__,  InstEmit32.Smla__,  OpCode32AluMla.Create);
             SetA32("<<<<0000111xxxxxxxxxxxxx1001xxxx", InstName.Smlal,   InstEmit32.Smlal,   OpCode32AluUmull.Create);
             SetA32("<<<<00010100xxxxxxxxxxxx1xx0xxxx", InstName.Smlal__, InstEmit32.Smlal__, OpCode32AluUmull.Create);
diff --git a/ARMeilleure/Instructions/InstEmitAlu32.cs b/ARMeilleure/Instructions/InstEmitAlu32.cs
index 5f55fcd19b..9aff0261ca 100644
--- a/ARMeilleure/Instructions/InstEmitAlu32.cs
+++ b/ARMeilleure/Instructions/InstEmitAlu32.cs
@@ -387,6 +387,11 @@ namespace ARMeilleure.Instructions
             EmitDiv(context, false);
         }
 
+        public static void Shadd8(ArmEmitterContext context)
+        {
+            EmitHadd8(context, false);
+        }
+
         public static void Ssat(ArmEmitterContext context)
         {
             OpCode32Sat op = (OpCode32Sat)context.CurrOp;
@@ -474,20 +479,7 @@ namespace ARMeilleure.Instructions
 
         public static void Uhadd8(ArmEmitterContext context)
         {
-            OpCode32AluReg op = (OpCode32AluReg)context.CurrOp;
-
-            Operand m = GetIntA32(context, op.Rm);
-            Operand n = GetIntA32(context, op.Rn);
-
-            Operand xor, res;
-
-            res = context.BitwiseAnd(m, n);
-            xor = context.BitwiseExclusiveOr(m, n);
-            xor = context.ShiftRightUI(xor, Const(1));
-            xor = context.BitwiseAnd(xor, Const(0x7F7F7F7Fu));
-            res = context.Add(res, xor);
-
-            SetIntA32(context, op.Rd, res);
+            EmitHadd8(context, true);
         }
 
         public static void Usat(ArmEmitterContext context)
@@ -659,6 +651,36 @@ namespace ARMeilleure.Instructions
             context.MarkLabel(lblEnd);
         }
 
+        private static void EmitHadd8(ArmEmitterContext context, bool unsigned)
+        {
+            OpCode32AluReg op = (OpCode32AluReg)context.CurrOp;
+
+            Operand m = GetIntA32(context, op.Rm);
+            Operand n = GetIntA32(context, op.Rn);
+
+            Operand xor, res, carry;
+
+            // This relies on the equality x+y == ((x&y) << 1) + (x^y).
+            // Note that x^y always contains the LSB of the result.
+            // Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
+            // We mask by 0x7F to remove the LSB so that it doesn't leak into the field below.
+
+            res = context.BitwiseAnd(m, n);
+            carry = context.BitwiseExclusiveOr(m, n);
+            xor = context.ShiftRightUI(carry, Const(1));
+            xor = context.BitwiseAnd(xor, Const(0x7F7F7F7Fu));
+            res = context.Add(res, xor);
+
+            if (!unsigned)
+            {
+                // Propagates the sign bit from (x^y)>>1 upwards by one.
+                carry = context.BitwiseAnd(carry, Const(0x80808080u));
+                res = context.BitwiseExclusiveOr(res, carry);
+            }
+
+            SetIntA32(context, op.Rd, res);
+        }
+
         private static void EmitSat(ArmEmitterContext context, int intMin, int intMax)
         {
             OpCode32Sat op = (OpCode32Sat)context.CurrOp;
@@ -772,4 +794,4 @@ namespace ARMeilleure.Instructions
             EmitGenericAluStoreA32(context, op.Rd, op.SetFlags, value);
         }
     }
-}
\ No newline at end of file
+}
diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs
index a520c86a34..698979b9a9 100644
--- a/ARMeilleure/Instructions/InstName.cs
+++ b/ARMeilleure/Instructions/InstName.cs
@@ -516,6 +516,7 @@ namespace ARMeilleure.Instructions
         Rsb,
         Rsc,
         Sbfx,
+        Shadd8,
         Smla__,
         Smlal,
         Smlal__,
diff --git a/Ryujinx.Tests/Cpu/CpuTestAlu32.cs b/Ryujinx.Tests/Cpu/CpuTestAlu32.cs
index 1867e27fe4..7a30f138ea 100644
--- a/Ryujinx.Tests/Cpu/CpuTestAlu32.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestAlu32.cs
@@ -77,6 +77,25 @@ namespace Ryujinx.Tests.Cpu
             CompareAgainstUnicorn();
         }
 
+        [Test, Pairwise]
+        public void Shadd8([Values(0u, 0xdu)] uint rd,
+                           [Values(1u)] uint rm,
+                           [Values(2u)] uint rn,
+                           [Random(RndCnt)] uint w0,
+                           [Random(RndCnt)] uint w1,
+                           [Random(RndCnt)] uint w2)
+        {
+            uint opcode = 0xE6300F90u; // SHADD8 R0, R0, R0
+
+            opcode |= ((rm & 15) << 0) | ((rd & 15) << 12) | ((rn & 15) << 16);
+
+            uint sp = TestContext.CurrentContext.Random.NextUInt();
+
+            SingleOpcode(opcode, r0: w0, r1: w1, r2: w2, sp: sp);
+
+            CompareAgainstUnicorn();
+        }
+
         [Test, Pairwise]
         public void Ssat_Usat([ValueSource("_Ssat_Usat_")] uint opcode,
                               [Values(0u, 0xdu)] uint rd,
@@ -120,7 +139,7 @@ namespace Ryujinx.Tests.Cpu
                            [Random(RndCnt)] uint w1,
                            [Random(RndCnt)] uint w2)
         {
-            uint opcode = 0xE6700F90u; //UHADD8 R0, R0, R0
+            uint opcode = 0xE6700F90u; // UHADD8 R0, R0, R0
 
             opcode |= ((rm & 15) << 0) | ((rd & 15) << 12) | ((rn & 15) << 16);