From 86b37d0ff7764ac62b1e9578b07a8b648a3bd55a Mon Sep 17 00:00:00 2001
From: merry <git@mary.rs>
Date: Tue, 8 Feb 2022 09:46:42 +0000
Subject: [PATCH] ARMeilleure: A32: Implement SHSUB8 and UHSUB8 (#3089)

* ARMeilleure: A32: Implement UHSUB8

* ARMeilleure: A32: Implement SHSUB8
---
 ARMeilleure/Decoders/OpCodeTable.cs       |  2 +
 ARMeilleure/Instructions/InstEmitAlu32.cs | 45 +++++++++++++++++++++++
 ARMeilleure/Instructions/InstName.cs      |  2 +
 Ryujinx.Tests/Cpu/CpuTestAlu32.cs         | 38 +++++++++++++++++++
 4 files changed, 87 insertions(+)

diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs
index d54bb5a511..53328a7357 100644
--- a/ARMeilleure/Decoders/OpCodeTable.cs
+++ b/ARMeilleure/Decoders/OpCodeTable.cs
@@ -734,6 +734,7 @@ namespace ARMeilleure.Decoders
             SetA32("<<<<0111101xxxxxxxxxxxxxx101xxxx", InstName.Sbfx,    InstEmit32.Sbfx,    OpCode32AluBf.Create);
             SetA32("<<<<01110001xxxx1111xxxx0001xxxx", InstName.Sdiv,    InstEmit32.Sdiv,    OpCode32AluMla.Create);
             SetA32("<<<<01100011xxxxxxxx11111001xxxx", InstName.Shadd8,  InstEmit32.Shadd8,  OpCode32AluReg.Create);
+            SetA32("<<<<01100011xxxxxxxx11111111xxxx", InstName.Shsub8,  InstEmit32.Shsub8,  OpCode32AluReg.Create);
             SetA32("<<<<00010000xxxxxxxxxxxx1xx0xxxx", InstName.Smla__,  InstEmit32.Smla__,  OpCode32AluMla.Create);
             SetA32("<<<<0000111xxxxxxxxxxxxx1001xxxx", InstName.Smlal,   InstEmit32.Smlal,   OpCode32AluUmull.Create);
             SetA32("<<<<00010100xxxxxxxxxxxx1xx0xxxx", InstName.Smlal__, InstEmit32.Smlal__, OpCode32AluUmull.Create);
@@ -782,6 +783,7 @@ namespace ARMeilleure.Decoders
             SetA32("<<<<0111111xxxxxxxxxxxxxx101xxxx", InstName.Ubfx,    InstEmit32.Ubfx,    OpCode32AluBf.Create);
             SetA32("<<<<01110011xxxx1111xxxx0001xxxx", InstName.Udiv,    InstEmit32.Udiv,    OpCode32AluMla.Create);
             SetA32("<<<<01100111xxxxxxxx11111001xxxx", InstName.Uhadd8,  InstEmit32.Uhadd8,  OpCode32AluReg.Create);
+            SetA32("<<<<01100111xxxxxxxx11111111xxxx", InstName.Uhsub8,  InstEmit32.Uhsub8,  OpCode32AluReg.Create);
             SetA32("<<<<00000100xxxxxxxxxxxx1001xxxx", InstName.Umaal,   InstEmit32.Umaal,   OpCode32AluUmull.Create);
             SetA32("<<<<0000101xxxxxxxxxxxxx1001xxxx", InstName.Umlal,   InstEmit32.Umlal,   OpCode32AluUmull.Create);
             SetA32("<<<<0000100xxxxxxxxxxxxx1001xxxx", InstName.Umull,   InstEmit32.Umull,   OpCode32AluUmull.Create);
diff --git a/ARMeilleure/Instructions/InstEmitAlu32.cs b/ARMeilleure/Instructions/InstEmitAlu32.cs
index 9aff0261ca..66b8a8a7ee 100644
--- a/ARMeilleure/Instructions/InstEmitAlu32.cs
+++ b/ARMeilleure/Instructions/InstEmitAlu32.cs
@@ -392,6 +392,11 @@ namespace ARMeilleure.Instructions
             EmitHadd8(context, false);
         }
 
+        public static void Shsub8(ArmEmitterContext context)
+        {
+            EmitHsub8(context, false);
+        }
+
         public static void Ssat(ArmEmitterContext context)
         {
             OpCode32Sat op = (OpCode32Sat)context.CurrOp;
@@ -482,6 +487,11 @@ namespace ARMeilleure.Instructions
             EmitHadd8(context, true);
         }
 
+        public static void Uhsub8(ArmEmitterContext context)
+        {
+            EmitHsub8(context, true);
+        }
+
         public static void Usat(ArmEmitterContext context)
         {
             OpCode32Sat op = (OpCode32Sat)context.CurrOp;
@@ -681,6 +691,41 @@ namespace ARMeilleure.Instructions
             SetIntA32(context, op.Rd, res);
         }
 
+        private static void EmitHsub8(ArmEmitterContext context, bool unsigned)
+        {
+            OpCode32AluReg op = (OpCode32AluReg)context.CurrOp;
+
+            Operand m = GetIntA32(context, op.Rm);
+            Operand n = GetIntA32(context, op.Rn);
+            Operand left, right, carry, res;
+
+            // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
+            // Note that x^y always contains the LSB of the result.
+            // Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
+
+            carry = context.BitwiseExclusiveOr(m, n);
+            left = context.ShiftRightUI(carry, Const(1));
+            right = context.BitwiseAnd(carry, m);
+
+            // We must now perform a partitioned subtraction.
+            // We can do this because minuend contains 7 bit fields.
+            // We use the extra bit in minuend as a bit to borrow from; we set this bit.
+            // We invert this bit at the end as this tells us if that bit was borrowed from.
+
+            res = context.BitwiseOr(left, Const(0x80808080));
+            res = context.Subtract(res, right);
+            res = context.BitwiseExclusiveOr(res, Const(0x80808080));
+
+            if (!unsigned)
+            {
+                // We then sign extend the result into this bit.
+                carry = context.BitwiseAnd(carry, Const(0x80808080));
+                res = context.BitwiseExclusiveOr(res, carry);
+            }
+
+            SetIntA32(context, op.Rd, res);
+        }
+
         private static void EmitSat(ArmEmitterContext context, int intMin, int intMax)
         {
             OpCode32Sat op = (OpCode32Sat)context.CurrOp;
diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs
index 698979b9a9..3e01649580 100644
--- a/ARMeilleure/Instructions/InstName.cs
+++ b/ARMeilleure/Instructions/InstName.cs
@@ -80,6 +80,7 @@ namespace ARMeilleure.Instructions
         Sbcs,
         Sbfm,
         Sdiv,
+        Shsub8,
         Smaddl,
         Smsubl,
         Smulh,
@@ -546,6 +547,7 @@ namespace ARMeilleure.Instructions
         Tst,
         Ubfx,
         Uhadd8,
+        Uhsub8,
         Umaal,
         Umlal,
         Umull,
diff --git a/Ryujinx.Tests/Cpu/CpuTestAlu32.cs b/Ryujinx.Tests/Cpu/CpuTestAlu32.cs
index 7a30f138ea..170bf98bd2 100644
--- a/Ryujinx.Tests/Cpu/CpuTestAlu32.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestAlu32.cs
@@ -96,6 +96,25 @@ namespace Ryujinx.Tests.Cpu
             CompareAgainstUnicorn();
         }
 
+        [Test, Pairwise]
+        public void Shsub8([Values(0u, 0xdu)] uint rd,
+                           [Values(1u)] uint rm,
+                           [Values(2u)] uint rn,
+                           [Random(RndCnt)] uint w0,
+                           [Random(RndCnt)] uint w1,
+                           [Random(RndCnt)] uint w2)
+        {
+            uint opcode = 0xE6300FF0u; // SHSUB8 R0, R0, R0
+
+            opcode |= ((rm & 15) << 0) | ((rd & 15) << 12) | ((rn & 15) << 16);
+
+            uint sp = TestContext.CurrentContext.Random.NextUInt();
+
+            SingleOpcode(opcode, r0: w0, r1: w1, r2: w2, sp: sp);
+
+            CompareAgainstUnicorn();
+        }
+
         [Test, Pairwise]
         public void Ssat_Usat([ValueSource("_Ssat_Usat_")] uint opcode,
                               [Values(0u, 0xdu)] uint rd,
@@ -149,6 +168,25 @@ namespace Ryujinx.Tests.Cpu
 
             CompareAgainstUnicorn();
         }
+
+        [Test, Pairwise]
+        public void Uhsub8([Values(0u, 0xdu)] uint rd,
+                           [Values(1u)] uint rm,
+                           [Values(2u)] uint rn,
+                           [Random(RndCnt)] uint w0,
+                           [Random(RndCnt)] uint w1,
+                           [Random(RndCnt)] uint w2)
+        {
+            uint opcode = 0xE6700FF0u; // UHSUB8 R0, R0, R0
+
+            opcode |= ((rm & 15) << 0) | ((rd & 15) << 12) | ((rn & 15) << 16);
+
+            uint sp = TestContext.CurrentContext.Random.NextUInt();
+
+            SingleOpcode(opcode, r0: w0, r1: w1, r2: w2, sp: sp);
+
+            CompareAgainstUnicorn();
+        }
 #endif
     }
 }