From 5f34353dce716bca2e3fc1c7e82be6276b95d61a Mon Sep 17 00:00:00 2001
From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>
Date: Sat, 4 Aug 2018 21:58:54 +0200
Subject: [PATCH] Add SQADD, UQADD, SQSUB, UQSUB, SUQADD, USQADD, SQABS, SQNEG
 (Scalar, Vector) instructions; add 24 Tests. Most saturation instructions now
 on ASoftFallback. (#314)

* Update AOpCodeTable.cs

* Update AInstEmitSimdHelper.cs

* Update AInstEmitSimdArithmetic.cs

* Update Pseudocode.cs

* Update Instructions.cs

* Update CpuTestSimd.cs

* Update CpuTestSimdReg.cs

* Update AInstEmitSimdHelper.cs

* Update AInstEmitSimdHelper.cs

* Update AInstEmitSimdHelper.cs

* Update AInstEmitSimdHelper.cs

* Update ASoftFallback.cs

* Update AInstEmitSimdHelper.cs

* Update ASoftFallback.cs

* Update AInstEmitSimdHelper.cs

* Update CpuTestSimd.cs

* Update CpuTestSimdReg.cs

* Update ASoftFallback.cs

* Update AInstEmitSimdHelper.cs

* Opt. (retest).
---
 ChocolArm64/AOpCodeTable.cs                   |  16 +
 .../Instruction/AInstEmitSimdArithmetic.cs    |  80 ++
 .../Instruction/AInstEmitSimdHelper.cs        | 394 +++++++--
 ChocolArm64/Instruction/ASoftFallback.cs      | 268 ++++++
 Ryujinx.Tests/Cpu/CpuTestSimd.cs              | 446 +++++++++-
 Ryujinx.Tests/Cpu/CpuTestSimdReg.cs           | 431 ++++++++++
 Ryujinx.Tests/Cpu/Tester/Instructions.cs      | 776 ++++++++++++++++++
 Ryujinx.Tests/Cpu/Tester/Pseudocode.cs        |   4 +-
 8 files changed, 2330 insertions(+), 85 deletions(-)

diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs
index aa8220e38d..aa8b1be606 100644
--- a/ChocolArm64/AOpCodeTable.cs
+++ b/ChocolArm64/AOpCodeTable.cs
@@ -374,7 +374,15 @@ namespace ChocolArm64
             SetA64("0x001110<<1xxxxx100000xxxxxxxxxx", AInstEmit.Smlal_V,       typeof(AOpCodeSimdReg));
             SetA64("0x001110<<1xxxxx101000xxxxxxxxxx", AInstEmit.Smlsl_V,       typeof(AOpCodeSimdReg));
             SetA64("0x001110<<1xxxxx110000xxxxxxxxxx", AInstEmit.Smull_V,       typeof(AOpCodeSimdReg));
+            SetA64("01011110xx100000011110xxxxxxxxxx", AInstEmit.Sqabs_S,       typeof(AOpCodeSimd));
+            SetA64("0>001110<<100000011110xxxxxxxxxx", AInstEmit.Sqabs_V,       typeof(AOpCodeSimd));
+            SetA64("01011110xx1xxxxx000011xxxxxxxxxx", AInstEmit.Sqadd_S,       typeof(AOpCodeSimdReg));
+            SetA64("0>001110<<1xxxxx000011xxxxxxxxxx", AInstEmit.Sqadd_V,       typeof(AOpCodeSimdReg));
+            SetA64("01111110xx100000011110xxxxxxxxxx", AInstEmit.Sqneg_S,       typeof(AOpCodeSimd));
+            SetA64("0>101110<<100000011110xxxxxxxxxx", AInstEmit.Sqneg_V,       typeof(AOpCodeSimd));
             SetA64("0x00111100>>>xxx100111xxxxxxxxxx", AInstEmit.Sqrshrn_V,     typeof(AOpCodeSimdShImm));
+            SetA64("01011110xx1xxxxx001011xxxxxxxxxx", AInstEmit.Sqsub_S,       typeof(AOpCodeSimdReg));
+            SetA64("0>001110<<1xxxxx001011xxxxxxxxxx", AInstEmit.Sqsub_V,       typeof(AOpCodeSimdReg));
             SetA64("01011110<<100001010010xxxxxxxxxx", AInstEmit.Sqxtn_S,       typeof(AOpCodeSimd));
             SetA64("0x001110<<100001010010xxxxxxxxxx", AInstEmit.Sqxtn_V,       typeof(AOpCodeSimd));
             SetA64("01111110<<100001001010xxxxxxxxxx", AInstEmit.Sqxtun_S,      typeof(AOpCodeSimd));
@@ -402,6 +410,8 @@ namespace ChocolArm64
             SetA64("01111110111xxxxx100001xxxxxxxxxx", AInstEmit.Sub_S,         typeof(AOpCodeSimdReg));
             SetA64("0>101110<<1xxxxx100001xxxxxxxxxx", AInstEmit.Sub_V,         typeof(AOpCodeSimdReg));
             SetA64("0x001110<<1xxxxx011000xxxxxxxxxx", AInstEmit.Subhn_V,       typeof(AOpCodeSimdReg));
+            SetA64("01011110xx100000001110xxxxxxxxxx", AInstEmit.Suqadd_S,      typeof(AOpCodeSimd));
+            SetA64("0>001110<<100000001110xxxxxxxxxx", AInstEmit.Suqadd_V,      typeof(AOpCodeSimd));
             SetA64("0x001110000xxxxx0xx000xxxxxxxxxx", AInstEmit.Tbl_V,         typeof(AOpCodeSimdTbl));
             SetA64("0>001110<<0xxxxx001010xxxxxxxxxx", AInstEmit.Trn1_V,        typeof(AOpCodeSimdReg));
             SetA64("0>001110<<0xxxxx011010xxxxxxxxxx", AInstEmit.Trn2_V,        typeof(AOpCodeSimdReg));
@@ -423,6 +433,10 @@ namespace ChocolArm64
             SetA64("0x101110<<1xxxxx101011xxxxxxxxxx", AInstEmit.Uminp_V,       typeof(AOpCodeSimdReg));
             SetA64("0x001110000xxxxx001111xxxxxxxxxx", AInstEmit.Umov_S,        typeof(AOpCodeSimdIns));
             SetA64("0x101110<<1xxxxx110000xxxxxxxxxx", AInstEmit.Umull_V,       typeof(AOpCodeSimdReg));
+            SetA64("01111110xx1xxxxx000011xxxxxxxxxx", AInstEmit.Uqadd_S,       typeof(AOpCodeSimdReg));
+            SetA64("0>101110<<1xxxxx000011xxxxxxxxxx", AInstEmit.Uqadd_V,       typeof(AOpCodeSimdReg));
+            SetA64("01111110xx1xxxxx001011xxxxxxxxxx", AInstEmit.Uqsub_S,       typeof(AOpCodeSimdReg));
+            SetA64("0>101110<<1xxxxx001011xxxxxxxxxx", AInstEmit.Uqsub_V,       typeof(AOpCodeSimdReg));
             SetA64("01111110<<100001010010xxxxxxxxxx", AInstEmit.Uqxtn_S,       typeof(AOpCodeSimd));
             SetA64("0x101110<<100001010010xxxxxxxxxx", AInstEmit.Uqxtn_V,       typeof(AOpCodeSimd));
             SetA64("0>101110<<1xxxxx010001xxxxxxxxxx", AInstEmit.Ushl_V,        typeof(AOpCodeSimdReg));
@@ -430,6 +444,8 @@ namespace ChocolArm64
             SetA64("0111111101xxxxxx000001xxxxxxxxxx", AInstEmit.Ushr_S,        typeof(AOpCodeSimdShImm));
             SetA64("0x10111100>>>xxx000001xxxxxxxxxx", AInstEmit.Ushr_V,        typeof(AOpCodeSimdShImm));
             SetA64("0110111101xxxxxx000001xxxxxxxxxx", AInstEmit.Ushr_V,        typeof(AOpCodeSimdShImm));
+            SetA64("01111110xx100000001110xxxxxxxxxx", AInstEmit.Usqadd_S,      typeof(AOpCodeSimd));
+            SetA64("0>101110<<100000001110xxxxxxxxxx", AInstEmit.Usqadd_V,      typeof(AOpCodeSimd));
             SetA64("0x10111100>>>xxx000101xxxxxxxxxx", AInstEmit.Usra_V,        typeof(AOpCodeSimdShImm));
             SetA64("0110111101xxxxxx000101xxxxxxxxxx", AInstEmit.Usra_V,        typeof(AOpCodeSimdShImm));
             SetA64("0x101110<<1xxxxx001100xxxxxxxxxx", AInstEmit.Usubw_V,       typeof(AOpCodeSimdReg));
diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
index 8e7418611e..6772fe83b7 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
@@ -1052,6 +1052,46 @@ namespace ChocolArm64.Instruction
             EmitVectorWidenRnRmBinaryOpSx(Context, () => Context.Emit(OpCodes.Mul));
         }
 
+        public static void Sqabs_S(AILEmitterCtx Context)
+        {
+            EmitScalarSaturatingUnaryOpSx(Context, () => EmitAbs(Context));
+        }
+
+        public static void Sqabs_V(AILEmitterCtx Context)
+        {
+            EmitVectorSaturatingUnaryOpSx(Context, () => EmitAbs(Context));
+        }
+
+        public static void Sqadd_S(AILEmitterCtx Context)
+        {
+            EmitScalarSaturatingBinaryOpSx(Context, SaturatingFlags.Add);
+        }
+
+        public static void Sqadd_V(AILEmitterCtx Context)
+        {
+            EmitVectorSaturatingBinaryOpSx(Context, SaturatingFlags.Add);
+        }
+
+        public static void Sqneg_S(AILEmitterCtx Context)
+        {
+            EmitScalarSaturatingUnaryOpSx(Context, () => Context.Emit(OpCodes.Neg));
+        }
+
+        public static void Sqneg_V(AILEmitterCtx Context)
+        {
+            EmitVectorSaturatingUnaryOpSx(Context, () => Context.Emit(OpCodes.Neg));
+        }
+
+        public static void Sqsub_S(AILEmitterCtx Context)
+        {
+            EmitScalarSaturatingBinaryOpSx(Context, SaturatingFlags.Sub);
+        }
+
+        public static void Sqsub_V(AILEmitterCtx Context)
+        {
+            EmitVectorSaturatingBinaryOpSx(Context, SaturatingFlags.Sub);
+        }
+
         public static void Sqxtn_S(AILEmitterCtx Context)
         {
             EmitScalarSaturatingNarrowOpSxSx(Context, () => { });
@@ -1099,6 +1139,16 @@ namespace ChocolArm64.Instruction
             EmitHighNarrow(Context, () => Context.Emit(OpCodes.Sub), Round: false);
         }
 
+        public static void Suqadd_S(AILEmitterCtx Context)
+        {
+            EmitScalarSaturatingBinaryOpSx(Context, SaturatingFlags.Accumulate);
+        }
+
+        public static void Suqadd_V(AILEmitterCtx Context)
+        {
+            EmitVectorSaturatingBinaryOpSx(Context, SaturatingFlags.Accumulate);
+        }
+
         public static void Uaba_V(AILEmitterCtx Context)
         {
             EmitVectorTernaryOpZx(Context, () =>
@@ -1221,6 +1271,26 @@ namespace ChocolArm64.Instruction
             EmitVectorWidenRnRmBinaryOpZx(Context, () => Context.Emit(OpCodes.Mul));
         }
 
+        public static void Uqadd_S(AILEmitterCtx Context)
+        {
+            EmitScalarSaturatingBinaryOpZx(Context, SaturatingFlags.Add);
+        }
+
+        public static void Uqadd_V(AILEmitterCtx Context)
+        {
+            EmitVectorSaturatingBinaryOpZx(Context, SaturatingFlags.Add);
+        }
+
+        public static void Uqsub_S(AILEmitterCtx Context)
+        {
+            EmitScalarSaturatingBinaryOpZx(Context, SaturatingFlags.Sub);
+        }
+
+        public static void Uqsub_V(AILEmitterCtx Context)
+        {
+            EmitVectorSaturatingBinaryOpZx(Context, SaturatingFlags.Sub);
+        }
+
         public static void Uqxtn_S(AILEmitterCtx Context)
         {
             EmitScalarSaturatingNarrowOpZxZx(Context, () => { });
@@ -1231,6 +1301,16 @@ namespace ChocolArm64.Instruction
             EmitVectorSaturatingNarrowOpZxZx(Context, () => { });
         }
 
+        public static void Usqadd_S(AILEmitterCtx Context)
+        {
+            EmitScalarSaturatingBinaryOpZx(Context, SaturatingFlags.Accumulate);
+        }
+
+        public static void Usqadd_V(AILEmitterCtx Context)
+        {
+            EmitVectorSaturatingBinaryOpZx(Context, SaturatingFlags.Accumulate);
+        }
+
         public static void Usubw_V(AILEmitterCtx Context)
         {
             EmitVectorWidenRmBinaryOpZx(Context, () => Context.Emit(OpCodes.Sub));
diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
index 0bd1a62926..161c44ea26 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
@@ -336,17 +336,21 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
 
-            if (Opers.HasFlag(OperFlags.Rd))
+            bool Rd = (Opers & OperFlags.Rd) != 0;
+            bool Rn = (Opers & OperFlags.Rn) != 0;
+            bool Rm = (Opers & OperFlags.Rm) != 0;
+
+            if (Rd)
             {
                 EmitVectorExtract(Context, Op.Rd, 0, Op.Size, Signed);
             }
 
-             if (Opers.HasFlag(OperFlags.Rn))
+            if (Rn)
             {
                 EmitVectorExtract(Context, Op.Rn, 0, Op.Size, Signed);
             }
 
-            if (Opers.HasFlag(OperFlags.Rm))
+            if (Rm)
             {
                 EmitVectorExtract(Context, ((AOpCodeSimdReg)Op).Rm, 0, Op.Size, Signed);
             }
@@ -377,17 +381,21 @@ namespace ChocolArm64.Instruction
 
             int SizeF = Op.Size & 1;
 
-            if (Opers.HasFlag(OperFlags.Ra))
+            bool Ra = (Opers & OperFlags.Ra) != 0;
+            bool Rn = (Opers & OperFlags.Rn) != 0;
+            bool Rm = (Opers & OperFlags.Rm) != 0;
+
+            if (Ra)
             {
                 EmitVectorExtractF(Context, ((AOpCodeSimdReg)Op).Ra, 0, SizeF);
             }
 
-            if (Opers.HasFlag(OperFlags.Rn))
+            if (Rn)
             {
                 EmitVectorExtractF(Context, Op.Rn, 0, SizeF);
             }
 
-            if (Opers.HasFlag(OperFlags.Rm))
+            if (Rm)
             {
                 EmitVectorExtractF(Context, ((AOpCodeSimdReg)Op).Rm, 0, SizeF);
             }
@@ -769,7 +777,7 @@ namespace ChocolArm64.Instruction
                 Emit();
 
                 EmitVectorInsertTmp(Context, Pairs + Index, Op.Size);
-                EmitVectorInsertTmp(Context, Index,         Op.Size);
+                EmitVectorInsertTmp(Context,         Index, Op.Size);
             }
 
             Context.EmitLdvectmp();
@@ -781,56 +789,241 @@ namespace ChocolArm64.Instruction
             }
         }
 
+        [Flags]
+        public enum SaturatingFlags
+        {
+            Scalar = 1 << 0,
+            Signed = 1 << 1,
+
+            Add = 1 << 2,
+            Sub = 1 << 3,
+
+            Accumulate = 1 << 4,
+
+            ScalarSx = Scalar | Signed,
+            ScalarZx = Scalar,
+
+            VectorSx = Signed,
+            VectorZx = 0,
+        }
+
+        public static void EmitScalarSaturatingUnaryOpSx(AILEmitterCtx Context, Action Emit)
+        {
+            EmitSaturatingUnaryOpSx(Context, Emit, SaturatingFlags.ScalarSx);
+        }
+
+        public static void EmitVectorSaturatingUnaryOpSx(AILEmitterCtx Context, Action Emit)
+        {
+            EmitSaturatingUnaryOpSx(Context, Emit, SaturatingFlags.VectorSx);
+        }
+
+        public static void EmitSaturatingUnaryOpSx(AILEmitterCtx Context, Action Emit, SaturatingFlags Flags)
+        {
+            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
+
+            bool Scalar = (Flags & SaturatingFlags.Scalar) != 0;
+
+            int Bytes = Op.GetBitsCount() >> 3;
+            int Elems = !Scalar ? Bytes >> Op.Size : 1;
+
+            if (Scalar)
+            {
+                EmitVectorZeroLowerTmp(Context);
+            }
+
+            for (int Index = 0; Index < Elems; Index++)
+            {
+                EmitVectorExtractSx(Context, Op.Rn, Index, Op.Size);
+
+                Emit();
+
+                EmitUnarySignedSatQAbsOrNeg(Context, Op.Size);
+
+                EmitVectorInsertTmp(Context, Index, Op.Size);
+            }
+
+            Context.EmitLdvectmp();
+            Context.EmitStvec(Op.Rd);
+
+            if ((Op.RegisterSize == ARegisterSize.SIMD64) || Scalar)
+            {
+                EmitVectorZeroUpper(Context, Op.Rd);
+            }
+        }
+
+        public static void EmitScalarSaturatingBinaryOpSx(AILEmitterCtx Context, SaturatingFlags Flags)
+        {
+            EmitSaturatingBinaryOp(Context, SaturatingFlags.ScalarSx | Flags);
+        }
+
+        public static void EmitScalarSaturatingBinaryOpZx(AILEmitterCtx Context, SaturatingFlags Flags)
+        {
+            EmitSaturatingBinaryOp(Context, SaturatingFlags.ScalarZx | Flags);
+        }
+
+        public static void EmitVectorSaturatingBinaryOpSx(AILEmitterCtx Context, SaturatingFlags Flags)
+        {
+            EmitSaturatingBinaryOp(Context, SaturatingFlags.VectorSx | Flags);
+        }
+
+        public static void EmitVectorSaturatingBinaryOpZx(AILEmitterCtx Context, SaturatingFlags Flags)
+        {
+            EmitSaturatingBinaryOp(Context, SaturatingFlags.VectorZx | Flags);
+        }
+
+        public static void EmitSaturatingBinaryOp(AILEmitterCtx Context, SaturatingFlags Flags)
+        {
+            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
+
+            bool Scalar = (Flags & SaturatingFlags.Scalar) != 0;
+            bool Signed = (Flags & SaturatingFlags.Signed) != 0;
+
+            bool Add = (Flags & SaturatingFlags.Add) != 0;
+            bool Sub = (Flags & SaturatingFlags.Sub) != 0;
+
+            bool Accumulate = (Flags & SaturatingFlags.Accumulate) != 0;
+
+            int Bytes = Op.GetBitsCount() >> 3;
+            int Elems = !Scalar ? Bytes >> Op.Size : 1;
+
+            if (Scalar)
+            {
+                EmitVectorZeroLowerTmp(Context);
+            }
+
+            if (Add || Sub)
+            {
+                for (int Index = 0; Index < Elems; Index++)
+                {
+                    EmitVectorExtract(Context,                   Op.Rn, Index, Op.Size, Signed);
+                    EmitVectorExtract(Context, ((AOpCodeSimdReg)Op).Rm, Index, Op.Size, Signed);
+
+                    if (Op.Size <= 2)
+                    {
+                        Context.Emit(Add ? OpCodes.Add : OpCodes.Sub);
+
+                        EmitSatQ(Context, Op.Size, true, Signed);
+                    }
+                    else /* if (Op.Size == 3) */
+                    {
+                        if (Add)
+                        {
+                            EmitBinarySatQAdd(Context, Signed);
+                        }
+                        else /* if (Sub) */
+                        {
+                            EmitBinarySatQSub(Context, Signed);
+                        }
+                    }
+
+                    EmitVectorInsertTmp(Context, Index, Op.Size);
+                }
+            }
+            else if (Accumulate)
+            {
+                for (int Index = 0; Index < Elems; Index++)
+                {
+                    EmitVectorExtract(Context, Op.Rn, Index, Op.Size, !Signed);
+                    EmitVectorExtract(Context, Op.Rd, Index, Op.Size,  Signed);
+
+                    if (Op.Size <= 2)
+                    {
+                        Context.Emit(OpCodes.Add);
+
+                        EmitSatQ(Context, Op.Size, true, Signed);
+                    }
+                    else /* if (Op.Size == 3) */
+                    {
+                        EmitBinarySatQAccumulate(Context, Signed);
+                    }
+
+                    EmitVectorInsertTmp(Context, Index, Op.Size);
+                }
+            }
+
+            Context.EmitLdvectmp();
+            Context.EmitStvec(Op.Rd);
+
+            if ((Op.RegisterSize == ARegisterSize.SIMD64) || Scalar)
+            {
+                EmitVectorZeroUpper(Context, Op.Rd);
+            }
+        }
+
+        [Flags]
+        public enum SaturatingNarrowFlags
+        {
+            Scalar    = 1 << 0,
+            SignedSrc = 1 << 1,
+            SignedDst = 1 << 2,
+
+            ScalarSxSx = Scalar | SignedSrc | SignedDst,
+            ScalarSxZx = Scalar | SignedSrc,
+            ScalarZxSx = Scalar | SignedDst,
+            ScalarZxZx = Scalar,
+
+            VectorSxSx = SignedSrc | SignedDst,
+            VectorSxZx = SignedSrc,
+            VectorZxSx = SignedDst,
+            VectorZxZx = 0
+        }
+
         public static void EmitScalarSaturatingNarrowOpSxSx(AILEmitterCtx Context, Action Emit)
         {
-            EmitSaturatingNarrowOp(Context, Emit, true, true, true);
+            EmitSaturatingNarrowOp(Context, Emit, SaturatingNarrowFlags.ScalarSxSx);
         }
 
         public static void EmitScalarSaturatingNarrowOpSxZx(AILEmitterCtx Context, Action Emit)
         {
-            EmitSaturatingNarrowOp(Context, Emit, true, false, true);
+            EmitSaturatingNarrowOp(Context, Emit, SaturatingNarrowFlags.ScalarSxZx);
+        }
+
+        public static void EmitScalarSaturatingNarrowOpZxSx(AILEmitterCtx Context, Action Emit)
+        {
+            EmitSaturatingNarrowOp(Context, Emit, SaturatingNarrowFlags.ScalarZxSx);
         }
 
         public static void EmitScalarSaturatingNarrowOpZxZx(AILEmitterCtx Context, Action Emit)
         {
-            EmitSaturatingNarrowOp(Context, Emit, false, false, true);
+            EmitSaturatingNarrowOp(Context, Emit, SaturatingNarrowFlags.ScalarZxZx);
         }
 
         public static void EmitVectorSaturatingNarrowOpSxSx(AILEmitterCtx Context, Action Emit)
         {
-            EmitSaturatingNarrowOp(Context, Emit, true, true, false);
+            EmitSaturatingNarrowOp(Context, Emit, SaturatingNarrowFlags.VectorSxSx);
         }
 
         public static void EmitVectorSaturatingNarrowOpSxZx(AILEmitterCtx Context, Action Emit)
         {
-            EmitSaturatingNarrowOp(Context, Emit, true, false, false);
+            EmitSaturatingNarrowOp(Context, Emit, SaturatingNarrowFlags.VectorSxZx);
+        }
+
+        public static void EmitVectorSaturatingNarrowOpZxSx(AILEmitterCtx Context, Action Emit)
+        {
+            EmitSaturatingNarrowOp(Context, Emit, SaturatingNarrowFlags.VectorZxSx);
         }
 
         public static void EmitVectorSaturatingNarrowOpZxZx(AILEmitterCtx Context, Action Emit)
         {
-            EmitSaturatingNarrowOp(Context, Emit, false, false, false);
+            EmitSaturatingNarrowOp(Context, Emit, SaturatingNarrowFlags.VectorZxZx);
         }
 
-        public static void EmitSaturatingNarrowOp(
-            AILEmitterCtx Context,
-            Action        Emit,
-            bool          SignedSrc,
-            bool          SignedDst,
-            bool          Scalar)
+        public static void EmitSaturatingNarrowOp(AILEmitterCtx Context, Action Emit, SaturatingNarrowFlags Flags)
         {
             AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
 
-            int Elems = !Scalar ? 8 >> Op.Size : 1;
+            bool Scalar    = (Flags & SaturatingNarrowFlags.Scalar)    != 0;
+            bool SignedSrc = (Flags & SaturatingNarrowFlags.SignedSrc) != 0;
+            bool SignedDst = (Flags & SaturatingNarrowFlags.SignedDst) != 0;
 
-            int ESize = 8 << Op.Size;
+            int Elems = !Scalar ? 8 >> Op.Size : 1;
 
             int Part = !Scalar && (Op.RegisterSize == ARegisterSize.SIMD128) ? Elems : 0;
 
-            long TMaxValue = SignedDst ? (1 << (ESize - 1)) - 1 : (long)(~0UL >> (64 - ESize));
-            long TMinValue = SignedDst ? -((1 << (ESize - 1))) : 0;
-
-            Context.EmitLdc_I8(0L);
-            Context.EmitSttmp();
+            if (Scalar)
+            {
+                EmitVectorZeroLowerTmp(Context);
+            }
 
             if (Part != 0)
             {
@@ -840,47 +1033,11 @@ namespace ChocolArm64.Instruction
 
             for (int Index = 0; Index < Elems; Index++)
             {
-                AILLabel LblLe    = new AILLabel();
-                AILLabel LblGeEnd = new AILLabel();
-
                 EmitVectorExtract(Context, Op.Rn, Index, Op.Size + 1, SignedSrc);
 
                 Emit();
 
-                Context.Emit(OpCodes.Dup);
-
-                Context.EmitLdc_I8(TMaxValue);
-
-                Context.Emit(SignedSrc ? OpCodes.Ble_S : OpCodes.Ble_Un_S, LblLe);
-
-                Context.Emit(OpCodes.Pop);
-
-                Context.EmitLdc_I8(TMaxValue);
-                Context.EmitLdc_I8(0x8000000L);
-                Context.EmitSttmp();
-
-                Context.Emit(OpCodes.Br_S, LblGeEnd);
-
-                Context.MarkLabel(LblLe);
-
-                Context.Emit(OpCodes.Dup);
-
-                Context.EmitLdc_I8(TMinValue);
-
-                Context.Emit(SignedSrc ? OpCodes.Bge_S : OpCodes.Bge_Un_S, LblGeEnd);
-
-                Context.Emit(OpCodes.Pop);
-
-                Context.EmitLdc_I8(TMinValue);
-                Context.EmitLdc_I8(0x8000000L);
-                Context.EmitSttmp();
-
-                Context.MarkLabel(LblGeEnd);
-
-                if (Scalar)
-                {
-                    EmitVectorZeroLowerTmp(Context);
-                }
+                EmitSatQ(Context, Op.Size, SignedSrc, SignedDst);
 
                 EmitVectorInsertTmp(Context, Part + Index, Op.Size);
             }
@@ -892,13 +1049,120 @@ namespace ChocolArm64.Instruction
             {
                 EmitVectorZeroUpper(Context, Op.Rd);
             }
+        }
+
+        // TSrc (16bit, 32bit, 64bit; signed, unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned).
+        public static void EmitSatQ(
+            AILEmitterCtx Context,
+            int  SizeDst,
+            bool SignedSrc,
+            bool SignedDst)
+        {
+            if (SizeDst > 2)
+            {
+                throw new ArgumentOutOfRangeException(nameof(SizeDst));
+            }
+
+            Context.EmitLdc_I4(SizeDst);
+            Context.EmitLdarg(ATranslatedSub.StateArgIdx);
+
+            if (SignedSrc)
+            {
+                ASoftFallback.EmitCall(Context, SignedDst
+                    ? nameof(ASoftFallback.SignedSrcSignedDstSatQ)
+                    : nameof(ASoftFallback.SignedSrcUnsignedDstSatQ));
+            }
+            else
+            {
+                ASoftFallback.EmitCall(Context, SignedDst
+                    ? nameof(ASoftFallback.UnsignedSrcSignedDstSatQ)
+                    : nameof(ASoftFallback.UnsignedSrcUnsignedDstSatQ));
+            }
+        }
+
+        // TSrc (8bit, 16bit, 32bit, 64bit) == TDst (8bit, 16bit, 32bit, 64bit); signed.
+        public static void EmitUnarySignedSatQAbsOrNeg(AILEmitterCtx Context, int Size)
+        {
+            int ESize = 8 << Size;
+
+            long TMaxValue =  (1L << (ESize - 1)) - 1L;
+            long TMinValue = -(1L << (ESize - 1));
+
+            AILLabel LblFalse = new AILLabel();
+
+            Context.Emit(OpCodes.Dup);
+            Context.Emit(OpCodes.Neg);
+            Context.EmitLdc_I8(TMinValue);
+            Context.Emit(OpCodes.Ceq);
+            Context.Emit(OpCodes.Brfalse_S, LblFalse);
+
+            Context.Emit(OpCodes.Pop);
+
+            EmitSetFpsrQCFlag(Context);
+
+            Context.EmitLdc_I8(TMaxValue);
+
+            Context.MarkLabel(LblFalse);
+        }
+
+        // TSrcs (64bit) == TDst (64bit); signed, unsigned.
+        public static void EmitBinarySatQAdd(AILEmitterCtx Context, bool Signed)
+        {
+            if (((AOpCodeSimdReg)Context.CurrOp).Size < 3)
+            {
+                throw new InvalidOperationException();
+            }
 
             Context.EmitLdarg(ATranslatedSub.StateArgIdx);
+
+            ASoftFallback.EmitCall(Context, Signed
+                ? nameof(ASoftFallback.BinarySignedSatQAdd)
+                : nameof(ASoftFallback.BinaryUnsignedSatQAdd));
+        }
+
+        // TSrcs (64bit) == TDst (64bit); signed, unsigned.
+        public static void EmitBinarySatQSub(AILEmitterCtx Context, bool Signed)
+        {
+            if (((AOpCodeSimdReg)Context.CurrOp).Size < 3)
+            {
+                throw new InvalidOperationException();
+            }
+
+            Context.EmitLdarg(ATranslatedSub.StateArgIdx);
+
+            ASoftFallback.EmitCall(Context, Signed
+                ? nameof(ASoftFallback.BinarySignedSatQSub)
+                : nameof(ASoftFallback.BinaryUnsignedSatQSub));
+        }
+
+        // TSrcs (64bit) == TDst (64bit); signed, unsigned.
+        public static void EmitBinarySatQAccumulate(AILEmitterCtx Context, bool Signed)
+        {
+            if (((AOpCodeSimd)Context.CurrOp).Size < 3)
+            {
+                throw new InvalidOperationException();
+            }
+
+            Context.EmitLdarg(ATranslatedSub.StateArgIdx);
+
+            ASoftFallback.EmitCall(Context, Signed
+                ? nameof(ASoftFallback.BinarySignedSatQAcc)
+                : nameof(ASoftFallback.BinaryUnsignedSatQAcc));
+        }
+
+        public static void EmitSetFpsrQCFlag(AILEmitterCtx Context)
+        {
+            const int QCFlagBit = 27;
+
+            Context.EmitLdarg(ATranslatedSub.StateArgIdx);
+
             Context.EmitLdarg(ATranslatedSub.StateArgIdx);
             Context.EmitCallPropGet(typeof(AThreadState), nameof(AThreadState.Fpsr));
-            Context.EmitLdtmp();
-            Context.Emit(OpCodes.Conv_I4);
+
+            Context.EmitLdc_I4(1 << QCFlagBit);
+
             Context.Emit(OpCodes.Or);
+
             Context.EmitCallPropSet(typeof(AThreadState), nameof(AThreadState.Fpsr));
         }
 
diff --git a/ChocolArm64/Instruction/ASoftFallback.cs b/ChocolArm64/Instruction/ASoftFallback.cs
index ae3994b029..a4d12dd613 100644
--- a/ChocolArm64/Instruction/ASoftFallback.cs
+++ b/ChocolArm64/Instruction/ASoftFallback.cs
@@ -1,3 +1,4 @@
+using ChocolArm64.State;
 using ChocolArm64.Translation;
 using System;
 
@@ -10,6 +11,273 @@ namespace ChocolArm64.Instruction
             Context.EmitCall(typeof(ASoftFallback), MthdName);
         }
 
+        public static long BinarySignedSatQAdd(long op1, long op2, AThreadState State)
+        {
+            long Add = op1 + op2;
+
+            if ((~(op1 ^ op2) & (op1 ^ Add)) < 0L)
+            {
+                SetFpsrQCFlag(State);
+
+                if (op1 < 0L)
+                {
+                    return long.MinValue;
+                }
+                else
+                {
+                    return long.MaxValue;
+                }
+            }
+            else
+            {
+                return Add;
+            }
+        }
+
+        public static ulong BinaryUnsignedSatQAdd(ulong op1, ulong op2, AThreadState State)
+        {
+            ulong Add = op1 + op2;
+
+            if ((Add < op1) && (Add < op2))
+            {
+                SetFpsrQCFlag(State);
+
+                return ulong.MaxValue;
+            }
+            else
+            {
+                return Add;
+            }
+        }
+
+        public static long BinarySignedSatQSub(long op1, long op2, AThreadState State)
+        {
+            long Sub = op1 - op2;
+
+            if (((op1 ^ op2) & (op1 ^ Sub)) < 0L)
+            {
+                SetFpsrQCFlag(State);
+
+                if (op1 < 0L)
+                {
+                    return long.MinValue;
+                }
+                else
+                {
+                    return long.MaxValue;
+                }
+            }
+            else
+            {
+                return Sub;
+            }
+        }
+
+        public static ulong BinaryUnsignedSatQSub(ulong op1, ulong op2, AThreadState State)
+        {
+            ulong Sub = op1 - op2;
+
+            if (op1 < op2)
+            {
+                SetFpsrQCFlag(State);
+
+                return ulong.MinValue;
+            }
+            else
+            {
+                return Sub;
+            }
+        }
+
+        public static long BinarySignedSatQAcc(ulong op1, long op2, AThreadState State)
+        {
+            if (op1 <= (ulong)long.MaxValue)
+            {
+                // op1 from ulong.MinValue to (ulong)long.MaxValue
+                // op2 from long.MinValue to long.MaxValue
+
+                long Add = (long)op1 + op2;
+
+                if ((~op2 & Add) < 0L)
+                {
+                    SetFpsrQCFlag(State);
+
+                    return long.MaxValue;
+                }
+                else
+                {
+                    return Add;
+                }
+            }
+            else if (op2 >= 0L)
+            {
+                // op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
+                // op2 from (long)ulong.MinValue to long.MaxValue
+
+                SetFpsrQCFlag(State);
+
+                return long.MaxValue;
+            }
+            else
+            {
+                // op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
+                // op2 from long.MinValue to (long)ulong.MinValue - 1L
+
+                ulong Add = op1 + (ulong)op2;
+
+                if (Add > (ulong)long.MaxValue)
+                {
+                    SetFpsrQCFlag(State);
+
+                    return long.MaxValue;
+                }
+                else
+                {
+                    return (long)Add;
+                }
+            }
+        }
+
+        public static ulong BinaryUnsignedSatQAcc(long op1, ulong op2, AThreadState State)
+        {
+            if (op1 >= 0L)
+            {
+                // op1 from (long)ulong.MinValue to long.MaxValue
+                // op2 from ulong.MinValue to ulong.MaxValue
+
+                ulong Add = (ulong)op1 + op2;
+
+                if ((Add < (ulong)op1) && (Add < op2))
+                {
+                    SetFpsrQCFlag(State);
+
+                    return ulong.MaxValue;
+                }
+                else
+                {
+                    return Add;
+                }
+            }
+            else if (op2 > (ulong)long.MaxValue)
+            {
+                // op1 from long.MinValue to (long)ulong.MinValue - 1L
+                // op2 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
+
+                return (ulong)op1 + op2;
+            }
+            else
+            {
+                // op1 from long.MinValue to (long)ulong.MinValue - 1L
+                // op2 from ulong.MinValue to (ulong)long.MaxValue
+
+                long Add = op1 + (long)op2;
+
+                if (Add < (long)ulong.MinValue)
+                {
+                    SetFpsrQCFlag(State);
+
+                    return ulong.MinValue;
+                }
+                else
+                {
+                    return (ulong)Add;
+                }
+            }
+        }
+
+        public static long SignedSrcSignedDstSatQ(long op, int Size, AThreadState State)
+        {
+            int ESize = 8 << Size;
+
+            long TMaxValue =  (1L << (ESize - 1)) - 1L;
+            long TMinValue = -(1L << (ESize - 1));
+
+            if (op > TMaxValue)
+            {
+                SetFpsrQCFlag(State);
+
+                return TMaxValue;
+            }
+            else if (op < TMinValue)
+            {
+                SetFpsrQCFlag(State);
+
+                return TMinValue;
+            }
+            else
+            {
+                return op;
+            }
+        }
+
+        public static ulong SignedSrcUnsignedDstSatQ(long op, int Size, AThreadState State)
+        {
+            int ESize = 8 << Size;
+
+            ulong TMaxValue = (1UL << ESize) - 1UL;
+            ulong TMinValue =  0UL;
+
+            if (op > (long)TMaxValue)
+            {
+                SetFpsrQCFlag(State);
+
+                return TMaxValue;
+            }
+            else if (op < (long)TMinValue)
+            {
+                SetFpsrQCFlag(State);
+
+                return TMinValue;
+            }
+            else
+            {
+                return (ulong)op;
+            }
+        }
+
+        public static long UnsignedSrcSignedDstSatQ(ulong op, int Size, AThreadState State)
+        {
+            int ESize = 8 << Size;
+
+            long TMaxValue = (1L << (ESize - 1)) - 1L;
+
+            if (op > (ulong)TMaxValue)
+            {
+                SetFpsrQCFlag(State);
+
+                return TMaxValue;
+            }
+            else
+            {
+                return (long)op;
+            }
+        }
+
+        public static ulong UnsignedSrcUnsignedDstSatQ(ulong op, int Size, AThreadState State)
+        {
+            int ESize = 8 << Size;
+
+            ulong TMaxValue = (1UL << ESize) - 1UL;
+
+            if (op > TMaxValue)
+            {
+                SetFpsrQCFlag(State);
+
+                return TMaxValue;
+            }
+            else
+            {
+                return op;
+            }
+        }
+
+        private static void SetFpsrQCFlag(AThreadState State)
+        {
+            const int QCFlagBit = 27;
+
+            State.Fpsr |= 1 << QCFlagBit;
+        }
+
         public static ulong CountLeadingSigns(ulong Value, int Size)
         {
             Value ^= Value >> 1;
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs
index 82591edaee..88c5981cd2 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs
@@ -22,6 +22,17 @@ namespace Ryujinx.Tests.Cpu
         }
 
 #region "ValueSource"
+        private static ulong[] _1B1H1S1D_()
+        {
+            return new ulong[] { 0x0000000000000000ul, 0x000000000000007Ful,
+                                 0x0000000000000080ul, 0x00000000000000FFul,
+                                 0x0000000000007FFFul, 0x0000000000008000ul,
+                                 0x000000000000FFFFul, 0x000000007FFFFFFFul,
+                                 0x0000000080000000ul, 0x00000000FFFFFFFFul,
+                                 0x7FFFFFFFFFFFFFFFul, 0x8000000000000000ul,
+                                 0xFFFFFFFFFFFFFFFFul };
+        }
+
         private static ulong[] _1D_()
         {
             return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul,
@@ -1126,6 +1137,192 @@ namespace Ryujinx.Tests.Cpu
             });
         }
 
+        [Test, Description("SQABS <V><d>, <V><n>")]
+        public void Sqabs_S_B_H_S_D([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [ValueSource("_1B1H1S1D_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_1B1H1S1D_")] [Random(RndCnt)] ulong A,
+                                    [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <B, H, S, D>
+        {
+            uint Opcode = 0x5E207800; // SQABS B0, B0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, Fpsr: Fpsr);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            Shared.FPSR = new Bits((uint)Fpsr);
+            SimdFp.Sqabs_S(Op[23, 22], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
+        }
+
+        [Test, Description("SQABS <Vd>.<T>, <Vn>.<T>")]
+        public void Sqabs_V_8B_4H_2S([Values(0u)] uint Rd,
+                                     [Values(1u, 0u)] uint Rn,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                     [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
+        {
+            uint Opcode = 0x0E207800; // SQABS V0.8B, V0.8B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, Fpsr: Fpsr);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            Shared.FPSR = new Bits((uint)Fpsr);
+            SimdFp.Sqabs_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
+        }
+
+        [Test, Description("SQABS <Vd>.<T>, <Vn>.<T>")]
+        public void Sqabs_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                         [Values(1u, 0u)] uint Rn,
+                                         [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                         [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
+                                         [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
+        {
+            uint Opcode = 0x4E207800; // SQABS V0.16B, V0.16B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, Fpsr: Fpsr);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            Shared.FPSR = new Bits((uint)Fpsr);
+            SimdFp.Sqabs_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
+        }
+
+        [Test, Description("SQNEG <V><d>, <V><n>")]
+        public void Sqneg_S_B_H_S_D([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [ValueSource("_1B1H1S1D_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_1B1H1S1D_")] [Random(RndCnt)] ulong A,
+                                    [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <B, H, S, D>
+        {
+            uint Opcode = 0x7E207800; // SQNEG B0, B0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, Fpsr: Fpsr);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            Shared.FPSR = new Bits((uint)Fpsr);
+            SimdFp.Sqneg_S(Op[23, 22], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
+        }
+
+        [Test, Description("SQNEG <Vd>.<T>, <Vn>.<T>")]
+        public void Sqneg_V_8B_4H_2S([Values(0u)] uint Rd,
+                                     [Values(1u, 0u)] uint Rn,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                     [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
+        {
+            uint Opcode = 0x2E207800; // SQNEG V0.8B, V0.8B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, Fpsr: Fpsr);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            Shared.FPSR = new Bits((uint)Fpsr);
+            SimdFp.Sqneg_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
+        }
+
+        [Test, Description("SQNEG <Vd>.<T>, <Vn>.<T>")]
+        public void Sqneg_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                         [Values(1u, 0u)] uint Rn,
+                                         [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                         [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
+                                         [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
+        {
+            uint Opcode = 0x6E207800; // SQNEG V0.16B, V0.16B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, Fpsr: Fpsr);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            Shared.FPSR = new Bits((uint)Fpsr);
+            SimdFp.Sqneg_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
+        }
+
         [Test, Description("SQXTN <Vb><d>, <Va><n>")]
         public void Sqxtn_S_HB_SH_DS([Values(0u)] uint Rd,
                                      [Values(1u, 0u)] uint Rn,
@@ -1138,12 +1335,15 @@ namespace Ryujinx.Tests.Cpu
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
             Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
-            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, Fpsr: Fpsr);
 
             AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
+            Shared.FPSR = new Bits((uint)Fpsr);
             SimdFp.Sqxtn_S(Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -1151,7 +1351,7 @@ namespace Ryujinx.Tests.Cpu
                 Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
                 Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
-            Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27]));
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
         }
 
         [Test, Description("SQXTN{2} <Vd>.<Tb>, <Vn>.<Ta>")]
@@ -1166,12 +1366,15 @@ namespace Ryujinx.Tests.Cpu
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
             Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0E1(A, A);
-            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, Fpsr: Fpsr);
 
             AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            Shared.FPSR = new Bits((uint)Fpsr);
             SimdFp.Sqxtn_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -1179,7 +1382,7 @@ namespace Ryujinx.Tests.Cpu
                 Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
                 Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
-            Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27]));
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
         }
 
         [Test, Description("SQXTN{2} <Vd>.<Tb>, <Vn>.<Ta>")]
@@ -1194,12 +1397,15 @@ namespace Ryujinx.Tests.Cpu
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
             Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0E1(A, A);
-            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, Fpsr: Fpsr);
 
             AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            Shared.FPSR = new Bits((uint)Fpsr);
             SimdFp.Sqxtn_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -1207,7 +1413,7 @@ namespace Ryujinx.Tests.Cpu
                 Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
                 Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
-            Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27]));
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
         }
 
         [Test, Description("SQXTUN <Vb><d>, <Va><n>")]
@@ -1222,12 +1428,15 @@ namespace Ryujinx.Tests.Cpu
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
             Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
-            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, Fpsr: Fpsr);
 
             AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
+            Shared.FPSR = new Bits((uint)Fpsr);
             SimdFp.Sqxtun_S(Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -1235,7 +1444,7 @@ namespace Ryujinx.Tests.Cpu
                 Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
                 Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
-            Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27]));
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
         }
 
         [Test, Description("SQXTUN{2} <Vd>.<Tb>, <Vn>.<Ta>")]
@@ -1250,12 +1459,15 @@ namespace Ryujinx.Tests.Cpu
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
             Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0E1(A, A);
-            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, Fpsr: Fpsr);
 
             AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            Shared.FPSR = new Bits((uint)Fpsr);
             SimdFp.Sqxtun_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -1263,7 +1475,7 @@ namespace Ryujinx.Tests.Cpu
                 Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
                 Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
-            Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27]));
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
         }
 
         [Test, Description("SQXTUN{2} <Vd>.<Tb>, <Vn>.<Ta>")]
@@ -1278,12 +1490,15 @@ namespace Ryujinx.Tests.Cpu
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
             Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0E1(A, A);
-            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, Fpsr: Fpsr);
 
             AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            Shared.FPSR = new Bits((uint)Fpsr);
             SimdFp.Sqxtun_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -1291,7 +1506,100 @@ namespace Ryujinx.Tests.Cpu
                 Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
                 Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
-            Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27]));
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
+        }
+
+        [Test, Description("SUQADD <V><d>, <V><n>")]
+        public void Suqadd_S_B_H_S_D([Values(0u)] uint Rd,
+                                     [Values(1u, 0u)] uint Rn,
+                                     [ValueSource("_1B1H1S1D_")] [Random(RndCnt)] ulong Z,
+                                     [ValueSource("_1B1H1S1D_")] [Random(RndCnt)] ulong A,
+                                     [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <B, H, S, D>
+        {
+            uint Opcode = 0x5E203800; // SUQADD B0, B0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, Fpsr: Fpsr);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            Shared.FPSR = new Bits((uint)Fpsr);
+            SimdFp.Suqadd_S(Op[23, 22], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
+        }
+
+        [Test, Description("SUQADD <Vd>.<T>, <Vn>.<T>")]
+        public void Suqadd_V_8B_4H_2S([Values(0u)] uint Rd,
+                                      [Values(1u, 0u)] uint Rn,
+                                      [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                      [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                      [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
+        {
+            uint Opcode = 0x0E203800; // SUQADD V0.8B, V0.8B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, Fpsr: Fpsr);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            Shared.FPSR = new Bits((uint)Fpsr);
+            SimdFp.Suqadd_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
+        }
+
+        [Test, Description("SUQADD <Vd>.<T>, <Vn>.<T>")]
+        public void Suqadd_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                          [Values(1u, 0u)] uint Rn,
+                                          [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                          [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
+                                          [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
+        {
+            uint Opcode = 0x4E203800; // SUQADD V0.16B, V0.16B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, Fpsr: Fpsr);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            Shared.FPSR = new Bits((uint)Fpsr);
+            SimdFp.Suqadd_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
         }
 
         [Test, Description("UQXTN <Vb><d>, <Va><n>")]
@@ -1306,12 +1614,15 @@ namespace Ryujinx.Tests.Cpu
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
             Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
-            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, Fpsr: Fpsr);
 
             AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
+            Shared.FPSR = new Bits((uint)Fpsr);
             SimdFp.Uqxtn_S(Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -1319,7 +1630,7 @@ namespace Ryujinx.Tests.Cpu
                 Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
                 Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
-            Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27]));
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
         }
 
         [Test, Description("UQXTN{2} <Vd>.<Tb>, <Vn>.<Ta>")]
@@ -1334,12 +1645,15 @@ namespace Ryujinx.Tests.Cpu
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
             Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0E1(A, A);
-            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, Fpsr: Fpsr);
 
             AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            Shared.FPSR = new Bits((uint)Fpsr);
             SimdFp.Uqxtn_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -1347,7 +1661,7 @@ namespace Ryujinx.Tests.Cpu
                 Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
                 Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
-            Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27]));
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
         }
 
         [Test, Description("UQXTN{2} <Vd>.<Tb>, <Vn>.<Ta>")]
@@ -1362,12 +1676,15 @@ namespace Ryujinx.Tests.Cpu
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
             Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0E1(A, A);
-            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, Fpsr: Fpsr);
 
             AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            Shared.FPSR = new Bits((uint)Fpsr);
             SimdFp.Uqxtn_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -1375,7 +1692,100 @@ namespace Ryujinx.Tests.Cpu
                 Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
                 Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
-            Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27]));
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
+        }
+
+        [Test, Description("USQADD <V><d>, <V><n>")]
+        public void Usqadd_S_B_H_S_D([Values(0u)] uint Rd,
+                                     [Values(1u, 0u)] uint Rn,
+                                     [ValueSource("_1B1H1S1D_")] [Random(RndCnt)] ulong Z,
+                                     [ValueSource("_1B1H1S1D_")] [Random(RndCnt)] ulong A,
+                                     [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <B, H, S, D>
+        {
+            uint Opcode = 0x7E203800; // USQADD B0, B0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, Fpsr: Fpsr);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            Shared.FPSR = new Bits((uint)Fpsr);
+            SimdFp.Usqadd_S(Op[23, 22], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
+        }
+
+        [Test, Description("USQADD <Vd>.<T>, <Vn>.<T>")]
+        public void Usqadd_V_8B_4H_2S([Values(0u)] uint Rd,
+                                      [Values(1u, 0u)] uint Rn,
+                                      [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                      [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                      [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
+        {
+            uint Opcode = 0x2E203800; // USQADD V0.8B, V0.8B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, Fpsr: Fpsr);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            Shared.FPSR = new Bits((uint)Fpsr);
+            SimdFp.Usqadd_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
+        }
+
+        [Test, Description("USQADD <Vd>.<T>, <Vn>.<T>")]
+        public void Usqadd_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                          [Values(1u, 0u)] uint Rn,
+                                          [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                          [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
+                                          [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
+        {
+            uint Opcode = 0x6E203800; // USQADD V0.16B, V0.16B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, Fpsr: Fpsr);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            Shared.FPSR = new Bits((uint)Fpsr);
+            SimdFp.Usqadd_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
         }
 
         [Test, Description("XTN{2} <Vd>.<Tb>, <Vn>.<Ta>")]
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs
index c67348d1da..e6cfcbde5b 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs
@@ -22,6 +22,17 @@ namespace Ryujinx.Tests.Cpu
         }
 
 #region "ValueSource"
+        private static ulong[] _1B1H1S1D_()
+        {
+            return new ulong[] { 0x0000000000000000ul, 0x000000000000007Ful,
+                                 0x0000000000000080ul, 0x00000000000000FFul,
+                                 0x0000000000007FFFul, 0x0000000000008000ul,
+                                 0x000000000000FFFFul, 0x000000007FFFFFFFul,
+                                 0x0000000080000000ul, 0x00000000FFFFFFFFul,
+                                 0x7FFFFFFFFFFFFFFFul, 0x8000000000000000ul,
+                                 0xFFFFFFFFFFFFFFFFul };
+        }
+
         private static ulong[] _1D_()
         {
             return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul,
@@ -1721,6 +1732,216 @@ namespace Ryujinx.Tests.Cpu
             });
         }
 
+        [Test, Pairwise, Description("SQADD <V><d>, <V><n>, <V><m>")]
+        public void Sqadd_S_B_H_S_D([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [Values(2u, 0u)] uint Rm,
+                                    [ValueSource("_1B1H1S1D_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_1B1H1S1D_")] [Random(RndCnt)] ulong A,
+                                    [ValueSource("_1B1H1S1D_")] [Random(RndCnt)] ulong B,
+                                    [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <B, H, S, D>
+        {
+            uint Opcode = 0x5E200C00; // SQADD B0, B0, B0
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2, Fpsr: Fpsr);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
+            Shared.FPSR = new Bits((uint)Fpsr);
+            SimdFp.Sqadd_S(Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
+        }
+
+        [Test, Pairwise, Description("SQADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Sqadd_V_8B_4H_2S([Values(0u)] uint Rd,
+                                     [Values(1u, 0u)] uint Rn,
+                                     [Values(2u, 0u)] uint Rm,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                     [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
+        {
+            uint Opcode = 0x0E200C00; // SQADD V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2, Fpsr: Fpsr);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
+            Shared.FPSR = new Bits((uint)Fpsr);
+            SimdFp.Sqadd_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
+        }
+
+        [Test, Pairwise, Description("SQADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Sqadd_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                         [Values(1u, 0u)] uint Rn,
+                                         [Values(2u, 0u)] uint Rm,
+                                         [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                         [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
+                                         [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B,
+                                         [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
+        {
+            uint Opcode = 0x4E200C00; // SQADD V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2, Fpsr: Fpsr);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
+            Shared.FPSR = new Bits((uint)Fpsr);
+            SimdFp.Sqadd_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
+        }
+
+        [Test, Pairwise, Description("SQSUB <V><d>, <V><n>, <V><m>")]
+        public void Sqsub_S_B_H_S_D([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [Values(2u, 0u)] uint Rm,
+                                    [ValueSource("_1B1H1S1D_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_1B1H1S1D_")] [Random(RndCnt)] ulong A,
+                                    [ValueSource("_1B1H1S1D_")] [Random(RndCnt)] ulong B,
+                                    [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <B, H, S, D>
+        {
+            uint Opcode = 0x5E202C00; // SQSUB B0, B0, B0
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2, Fpsr: Fpsr);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
+            Shared.FPSR = new Bits((uint)Fpsr);
+            SimdFp.Sqsub_S(Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
+        }
+
+        [Test, Pairwise, Description("SQSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Sqsub_V_8B_4H_2S([Values(0u)] uint Rd,
+                                     [Values(1u, 0u)] uint Rn,
+                                     [Values(2u, 0u)] uint Rm,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                     [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
+        {
+            uint Opcode = 0x0E202C00; // SQSUB V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2, Fpsr: Fpsr);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
+            Shared.FPSR = new Bits((uint)Fpsr);
+            SimdFp.Sqsub_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
+        }
+
+        [Test, Pairwise, Description("SQSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Sqsub_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                         [Values(1u, 0u)] uint Rn,
+                                         [Values(2u, 0u)] uint Rm,
+                                         [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                         [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
+                                         [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B,
+                                         [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
+        {
+            uint Opcode = 0x4E202C00; // SQSUB V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2, Fpsr: Fpsr);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
+            Shared.FPSR = new Bits((uint)Fpsr);
+            SimdFp.Sqsub_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
+        }
+
         [Test, Pairwise, Description("SSUBW{2} <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb>")]
         public void Ssubw_V_8B8H8H_4H4S4S_2S2D2D([Values(0u)] uint Rd,
                                                  [Values(1u, 0u)] uint Rn,
@@ -2370,6 +2591,216 @@ namespace Ryujinx.Tests.Cpu
             });
         }
 
+        [Test, Pairwise, Description("UQADD <V><d>, <V><n>, <V><m>")]
+        public void Uqadd_S_B_H_S_D([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [Values(2u, 0u)] uint Rm,
+                                    [ValueSource("_1B1H1S1D_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_1B1H1S1D_")] [Random(RndCnt)] ulong A,
+                                    [ValueSource("_1B1H1S1D_")] [Random(RndCnt)] ulong B,
+                                    [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <B, H, S, D>
+        {
+            uint Opcode = 0x7E200C00; // UQADD B0, B0, B0
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2, Fpsr: Fpsr);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
+            Shared.FPSR = new Bits((uint)Fpsr);
+            SimdFp.Uqadd_S(Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
+        }
+
+        [Test, Pairwise, Description("UQADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Uqadd_V_8B_4H_2S([Values(0u)] uint Rd,
+                                     [Values(1u, 0u)] uint Rn,
+                                     [Values(2u, 0u)] uint Rm,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                     [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
+        {
+            uint Opcode = 0x2E200C00; // UQADD V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2, Fpsr: Fpsr);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
+            Shared.FPSR = new Bits((uint)Fpsr);
+            SimdFp.Uqadd_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
+        }
+
+        [Test, Pairwise, Description("UQADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Uqadd_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                         [Values(1u, 0u)] uint Rn,
+                                         [Values(2u, 0u)] uint Rm,
+                                         [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                         [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
+                                         [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B,
+                                         [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
+        {
+            uint Opcode = 0x6E200C00; // UQADD V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2, Fpsr: Fpsr);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
+            Shared.FPSR = new Bits((uint)Fpsr);
+            SimdFp.Uqadd_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
+        }
+
+        [Test, Pairwise, Description("UQSUB <V><d>, <V><n>, <V><m>")]
+        public void Uqsub_S_B_H_S_D([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [Values(2u, 0u)] uint Rm,
+                                    [ValueSource("_1B1H1S1D_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_1B1H1S1D_")] [Random(RndCnt)] ulong A,
+                                    [ValueSource("_1B1H1S1D_")] [Random(RndCnt)] ulong B,
+                                    [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <B, H, S, D>
+        {
+            uint Opcode = 0x7E202C00; // UQSUB B0, B0, B0
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2, Fpsr: Fpsr);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
+            Shared.FPSR = new Bits((uint)Fpsr);
+            SimdFp.Uqsub_S(Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
+        }
+
+        [Test, Pairwise, Description("UQSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Uqsub_V_8B_4H_2S([Values(0u)] uint Rd,
+                                     [Values(1u, 0u)] uint Rn,
+                                     [Values(2u, 0u)] uint Rm,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                     [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
+        {
+            uint Opcode = 0x2E202C00; // UQSUB V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2, Fpsr: Fpsr);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
+            Shared.FPSR = new Bits((uint)Fpsr);
+            SimdFp.Uqsub_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
+        }
+
+        [Test, Pairwise, Description("UQSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Uqsub_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                         [Values(1u, 0u)] uint Rn,
+                                         [Values(2u, 0u)] uint Rm,
+                                         [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                         [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
+                                         [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B,
+                                         [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
+        {
+            uint Opcode = 0x6E202C00; // UQSUB V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            int Fpsr = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2, Fpsr: Fpsr);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
+            Shared.FPSR = new Bits((uint)Fpsr);
+            SimdFp.Uqsub_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+            Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32()));
+        }
+
         [Test, Pairwise, Description("USUBW{2} <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb>")]
         public void Usubw_V_8B8H8H_4H4S4S_2S2D2D([Values(0u)] uint Rd,
                                                  [Values(1u, 0u)] uint Rn,
diff --git a/Ryujinx.Tests/Cpu/Tester/Instructions.cs b/Ryujinx.Tests/Cpu/Tester/Instructions.cs
index 2f52dcbf55..8b1b010f65 100644
--- a/Ryujinx.Tests/Cpu/Tester/Instructions.cs
+++ b/Ryujinx.Tests/Cpu/Tester/Instructions.cs
@@ -3060,6 +3060,210 @@ namespace Ryujinx.Tests.Cpu.Tester
             V(d, result);
         }
 
+        // sqabs_advsimd.html#SQABS_asisdmisc_R
+        public static void Sqabs_S(Bits size, Bits Rn, Bits Rd)
+        {
+            const bool U = false;
+
+            /* Decode Scalar */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+
+            int esize = 8 << (int)UInt(size);
+            int datasize = esize;
+            int elements = 1;
+
+            bool neg = (U == true);
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operand = V(datasize, n);
+            BigInteger element;
+            bool sat;
+
+            for (int e = 0; e <= elements - 1; e++)
+            {
+                element = SInt(Elem(operand, e, esize));
+
+                if (neg)
+                {
+                    element = -element;
+                }
+                else
+                {
+                    element = Abs(element);
+                }
+
+                (Bits _result, bool _sat) = SignedSatQ(element, esize);
+                Elem(result, e, esize, _result);
+                sat = _sat;
+
+                if (sat)
+                {
+                    /* FPSR.QC = '1'; */
+                    FPSR[27] = true; // TODO: Add named fields.
+                }
+            }
+
+            V(d, result);
+        }
+
+        // sqabs_advsimd.html#SQABS_asimdmisc_R
+        public static void Sqabs_V(bool Q, Bits size, Bits Rn, Bits Rd)
+        {
+            const bool U = false;
+
+            /* Decode Vector */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+
+            /* if size:Q == '110' then ReservedValue(); */
+
+            int esize = 8 << (int)UInt(size);
+            int datasize = (Q ? 128 : 64);
+            int elements = datasize / esize;
+
+            bool neg = (U == true);
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operand = V(datasize, n);
+            BigInteger element;
+            bool sat;
+
+            for (int e = 0; e <= elements - 1; e++)
+            {
+                element = SInt(Elem(operand, e, esize));
+
+                if (neg)
+                {
+                    element = -element;
+                }
+                else
+                {
+                    element = Abs(element);
+                }
+
+                (Bits _result, bool _sat) = SignedSatQ(element, esize);
+                Elem(result, e, esize, _result);
+                sat = _sat;
+
+                if (sat)
+                {
+                    /* FPSR.QC = '1'; */
+                    FPSR[27] = true; // TODO: Add named fields.
+                }
+            }
+
+            V(d, result);
+        }
+
+        // sqneg_advsimd.html#SQNEG_asisdmisc_R
+        public static void Sqneg_S(Bits size, Bits Rn, Bits Rd)
+        {
+            const bool U = true;
+
+            /* Decode Scalar */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+
+            int esize = 8 << (int)UInt(size);
+            int datasize = esize;
+            int elements = 1;
+
+            bool neg = (U == true);
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operand = V(datasize, n);
+            BigInteger element;
+            bool sat;
+
+            for (int e = 0; e <= elements - 1; e++)
+            {
+                element = SInt(Elem(operand, e, esize));
+
+                if (neg)
+                {
+                    element = -element;
+                }
+                else
+                {
+                    element = Abs(element);
+                }
+
+                (Bits _result, bool _sat) = SignedSatQ(element, esize);
+                Elem(result, e, esize, _result);
+                sat = _sat;
+
+                if (sat)
+                {
+                    /* FPSR.QC = '1'; */
+                    FPSR[27] = true; // TODO: Add named fields.
+                }
+            }
+
+            V(d, result);
+        }
+
+        // sqneg_advsimd.html#SQNEG_asimdmisc_R
+        public static void Sqneg_V(bool Q, Bits size, Bits Rn, Bits Rd)
+        {
+            const bool U = true;
+
+            /* Decode Vector */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+
+            /* if size:Q == '110' then ReservedValue(); */
+
+            int esize = 8 << (int)UInt(size);
+            int datasize = (Q ? 128 : 64);
+            int elements = datasize / esize;
+
+            bool neg = (U == true);
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operand = V(datasize, n);
+            BigInteger element;
+            bool sat;
+
+            for (int e = 0; e <= elements - 1; e++)
+            {
+                element = SInt(Elem(operand, e, esize));
+
+                if (neg)
+                {
+                    element = -element;
+                }
+                else
+                {
+                    element = Abs(element);
+                }
+
+                (Bits _result, bool _sat) = SignedSatQ(element, esize);
+                Elem(result, e, esize, _result);
+                sat = _sat;
+
+                if (sat)
+                {
+                    /* FPSR.QC = '1'; */
+                    FPSR[27] = true; // TODO: Add named fields.
+                }
+            }
+
+            V(d, result);
+        }
+
         // sqxtn_advsimd.html#SQXTN_asisdmisc_N
         public static void Sqxtn_S(Bits size, Bits Rn, Bits Rd)
         {
@@ -3228,6 +3432,96 @@ namespace Ryujinx.Tests.Cpu.Tester
             Vpart(d, part, result);
         }
 
+        // suqadd_advsimd.html#SUQADD_asisdmisc_R
+        public static void Suqadd_S(Bits size, Bits Rn, Bits Rd)
+        {
+            const bool U = false;
+
+            /* Decode Scalar */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+
+            int esize = 8 << (int)UInt(size);
+            int datasize = esize;
+            int elements = 1;
+
+            bool unsigned = (U == true);
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operand = V(datasize, n);
+            Bits operand2 = V(datasize, d);
+            BigInteger op1;
+            BigInteger op2;
+            bool sat;
+
+            for (int e = 0; e <= elements - 1; e++)
+            {
+                op1 = Int(Elem(operand, e, esize), !unsigned);
+                op2 = Int(Elem(operand2, e, esize), unsigned);
+
+                (Bits _result, bool _sat) = SatQ(op1 + op2, esize, unsigned);
+                Elem(result, e, esize, _result);
+                sat = _sat;
+
+                if (sat)
+                {
+                    /* FPSR.QC = '1'; */
+                    FPSR[27] = true; // TODO: Add named fields.
+                }
+            }
+
+            V(d, result);
+        }
+
+        // suqadd_advsimd.html#SUQADD_asimdmisc_R
+        public static void Suqadd_V(bool Q, Bits size, Bits Rn, Bits Rd)
+        {
+            const bool U = false;
+
+            /* Decode Vector */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+
+            /* if size:Q == '110' then ReservedValue(); */
+
+            int esize = 8 << (int)UInt(size);
+            int datasize = (Q ? 128 : 64);
+            int elements = datasize / esize;
+
+            bool unsigned = (U == true);
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operand = V(datasize, n);
+            Bits operand2 = V(datasize, d);
+            BigInteger op1;
+            BigInteger op2;
+            bool sat;
+
+            for (int e = 0; e <= elements - 1; e++)
+            {
+                op1 = Int(Elem(operand, e, esize), !unsigned);
+                op2 = Int(Elem(operand2, e, esize), unsigned);
+
+                (Bits _result, bool _sat) = SatQ(op1 + op2, esize, unsigned);
+                Elem(result, e, esize, _result);
+                sat = _sat;
+
+                if (sat)
+                {
+                    /* FPSR.QC = '1'; */
+                    FPSR[27] = true; // TODO: Add named fields.
+                }
+            }
+
+            V(d, result);
+        }
+
         // uqxtn_advsimd.html#UQXTN_asisdmisc_N
         public static void Uqxtn_S(Bits size, Bits Rn, Bits Rd)
         {
@@ -3316,6 +3610,96 @@ namespace Ryujinx.Tests.Cpu.Tester
             Vpart(d, part, result);
         }
 
+        // usqadd_advsimd.html#USQADD_asisdmisc_R
+        public static void Usqadd_S(Bits size, Bits Rn, Bits Rd)
+        {
+            const bool U = true;
+
+            /* Decode Scalar */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+
+            int esize = 8 << (int)UInt(size);
+            int datasize = esize;
+            int elements = 1;
+
+            bool unsigned = (U == true);
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operand = V(datasize, n);
+            Bits operand2 = V(datasize, d);
+            BigInteger op1;
+            BigInteger op2;
+            bool sat;
+
+            for (int e = 0; e <= elements - 1; e++)
+            {
+                op1 = Int(Elem(operand, e, esize), !unsigned);
+                op2 = Int(Elem(operand2, e, esize), unsigned);
+
+                (Bits _result, bool _sat) = SatQ(op1 + op2, esize, unsigned);
+                Elem(result, e, esize, _result);
+                sat = _sat;
+
+                if (sat)
+                {
+                    /* FPSR.QC = '1'; */
+                    FPSR[27] = true; // TODO: Add named fields.
+                }
+            }
+
+            V(d, result);
+        }
+
+        // usqadd_advsimd.html#USQADD_asimdmisc_R
+        public static void Usqadd_V(bool Q, Bits size, Bits Rn, Bits Rd)
+        {
+            const bool U = true;
+
+            /* Decode Vector */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+
+            /* if size:Q == '110' then ReservedValue(); */
+
+            int esize = 8 << (int)UInt(size);
+            int datasize = (Q ? 128 : 64);
+            int elements = datasize / esize;
+
+            bool unsigned = (U == true);
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operand = V(datasize, n);
+            Bits operand2 = V(datasize, d);
+            BigInteger op1;
+            BigInteger op2;
+            bool sat;
+
+            for (int e = 0; e <= elements - 1; e++)
+            {
+                op1 = Int(Elem(operand, e, esize), !unsigned);
+                op2 = Int(Elem(operand2, e, esize), unsigned);
+
+                (Bits _result, bool _sat) = SatQ(op1 + op2, esize, unsigned);
+                Elem(result, e, esize, _result);
+                sat = _sat;
+
+                if (sat)
+                {
+                    /* FPSR.QC = '1'; */
+                    FPSR[27] = true; // TODO: Add named fields.
+                }
+            }
+
+            V(d, result);
+        }
+
         // xtn_advsimd.html
         public static void Xtn_V(bool Q, Bits size, Bits Rn, Bits Rd)
         {
@@ -4593,6 +4977,202 @@ namespace Ryujinx.Tests.Cpu.Tester
             V(d, result);
         }
 
+        // sqadd_advsimd.html#SQADD_asisdsame_only
+        public static void Sqadd_S(Bits size, Bits Rm, Bits Rn, Bits Rd)
+        {
+            const bool U = false;
+
+            /* Decode Scalar */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+            int m = (int)UInt(Rm);
+
+            int esize = 8 << (int)UInt(size);
+            int datasize = esize;
+            int elements = 1;
+
+            bool unsigned = (U == true);
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operand1 = V(datasize, n);
+            Bits operand2 = V(datasize, m);
+            BigInteger element1;
+            BigInteger element2;
+            BigInteger sum;
+            bool sat;
+
+            for (int e = 0; e <= elements - 1; e++)
+            {
+                element1 = Int(Elem(operand1, e, esize), unsigned);
+                element2 = Int(Elem(operand2, e, esize), unsigned);
+
+                sum = element1 + element2;
+
+                (Bits _result, bool _sat) = SatQ(sum, esize, unsigned);
+                Elem(result, e, esize, _result);
+                sat = _sat;
+
+                if (sat)
+                {
+                    /* FPSR.QC = '1'; */
+                    FPSR[27] = true; // TODO: Add named fields.
+                }
+            }
+
+            V(d, result);
+        }
+
+        // sqadd_advsimd.html#SQADD_asimdsame_only
+        public static void Sqadd_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd)
+        {
+            const bool U = false;
+
+            /* Decode Vector */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+            int m = (int)UInt(Rm);
+
+            /* if size:Q == '110' then ReservedValue(); */
+
+            int esize = 8 << (int)UInt(size);
+            int datasize = (Q ? 128 : 64);
+            int elements = datasize / esize;
+
+            bool unsigned = (U == true);
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operand1 = V(datasize, n);
+            Bits operand2 = V(datasize, m);
+            BigInteger element1;
+            BigInteger element2;
+            BigInteger sum;
+            bool sat;
+
+            for (int e = 0; e <= elements - 1; e++)
+            {
+                element1 = Int(Elem(operand1, e, esize), unsigned);
+                element2 = Int(Elem(operand2, e, esize), unsigned);
+
+                sum = element1 + element2;
+
+                (Bits _result, bool _sat) = SatQ(sum, esize, unsigned);
+                Elem(result, e, esize, _result);
+                sat = _sat;
+
+                if (sat)
+                {
+                    /* FPSR.QC = '1'; */
+                    FPSR[27] = true; // TODO: Add named fields.
+                }
+            }
+
+            V(d, result);
+        }
+
+        // sqsub_advsimd.html#SQSUB_asisdsame_only
+        public static void Sqsub_S(Bits size, Bits Rm, Bits Rn, Bits Rd)
+        {
+            const bool U = false;
+
+            /* Decode Scalar */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+            int m = (int)UInt(Rm);
+
+            int esize = 8 << (int)UInt(size);
+            int datasize = esize;
+            int elements = 1;
+
+            bool unsigned = (U == true);
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operand1 = V(datasize, n);
+            Bits operand2 = V(datasize, m);
+            BigInteger element1;
+            BigInteger element2;
+            BigInteger diff;
+            bool sat;
+
+            for (int e = 0; e <= elements - 1; e++)
+            {
+                element1 = Int(Elem(operand1, e, esize), unsigned);
+                element2 = Int(Elem(operand2, e, esize), unsigned);
+
+                diff = element1 - element2;
+
+                (Bits _result, bool _sat) = SatQ(diff, esize, unsigned);
+                Elem(result, e, esize, _result);
+                sat = _sat;
+
+                if (sat)
+                {
+                    /* FPSR.QC = '1'; */
+                    FPSR[27] = true; // TODO: Add named fields.
+                }
+            }
+
+            V(d, result);
+        }
+
+        // sqsub_advsimd.html#SQSUB_asimdsame_only
+        public static void Sqsub_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd)
+        {
+            const bool U = false;
+
+            /* Decode Vector */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+            int m = (int)UInt(Rm);
+
+            /* if size:Q == '110' then ReservedValue(); */
+
+            int esize = 8 << (int)UInt(size);
+            int datasize = (Q ? 128 : 64);
+            int elements = datasize / esize;
+
+            bool unsigned = (U == true);
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operand1 = V(datasize, n);
+            Bits operand2 = V(datasize, m);
+            BigInteger element1;
+            BigInteger element2;
+            BigInteger diff;
+            bool sat;
+
+            for (int e = 0; e <= elements - 1; e++)
+            {
+                element1 = Int(Elem(operand1, e, esize), unsigned);
+                element2 = Int(Elem(operand2, e, esize), unsigned);
+
+                diff = element1 - element2;
+
+                (Bits _result, bool _sat) = SatQ(diff, esize, unsigned);
+                Elem(result, e, esize, _result);
+                sat = _sat;
+
+                if (sat)
+                {
+                    /* FPSR.QC = '1'; */
+                    FPSR[27] = true; // TODO: Add named fields.
+                }
+            }
+
+            V(d, result);
+        }
+
         // ssubw_advsimd.html
         public static void Ssubw_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd)
         {
@@ -5085,6 +5665,202 @@ namespace Ryujinx.Tests.Cpu.Tester
             V(d, result);
         }
 
+        // uqadd_advsimd.html#UQADD_asisdsame_only
+        public static void Uqadd_S(Bits size, Bits Rm, Bits Rn, Bits Rd)
+        {
+            const bool U = true;
+
+            /* Decode Scalar */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+            int m = (int)UInt(Rm);
+
+            int esize = 8 << (int)UInt(size);
+            int datasize = esize;
+            int elements = 1;
+
+            bool unsigned = (U == true);
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operand1 = V(datasize, n);
+            Bits operand2 = V(datasize, m);
+            BigInteger element1;
+            BigInteger element2;
+            BigInteger sum;
+            bool sat;
+
+            for (int e = 0; e <= elements - 1; e++)
+            {
+                element1 = Int(Elem(operand1, e, esize), unsigned);
+                element2 = Int(Elem(operand2, e, esize), unsigned);
+
+                sum = element1 + element2;
+
+                (Bits _result, bool _sat) = SatQ(sum, esize, unsigned);
+                Elem(result, e, esize, _result);
+                sat = _sat;
+
+                if (sat)
+                {
+                    /* FPSR.QC = '1'; */
+                    FPSR[27] = true; // TODO: Add named fields.
+                }
+            }
+
+            V(d, result);
+        }
+
+        // uqadd_advsimd.html#UQADD_asimdsame_only
+        public static void Uqadd_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd)
+        {
+            const bool U = true;
+
+            /* Decode Vector */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+            int m = (int)UInt(Rm);
+
+            /* if size:Q == '110' then ReservedValue(); */
+
+            int esize = 8 << (int)UInt(size);
+            int datasize = (Q ? 128 : 64);
+            int elements = datasize / esize;
+
+            bool unsigned = (U == true);
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operand1 = V(datasize, n);
+            Bits operand2 = V(datasize, m);
+            BigInteger element1;
+            BigInteger element2;
+            BigInteger sum;
+            bool sat;
+
+            for (int e = 0; e <= elements - 1; e++)
+            {
+                element1 = Int(Elem(operand1, e, esize), unsigned);
+                element2 = Int(Elem(operand2, e, esize), unsigned);
+
+                sum = element1 + element2;
+
+                (Bits _result, bool _sat) = SatQ(sum, esize, unsigned);
+                Elem(result, e, esize, _result);
+                sat = _sat;
+
+                if (sat)
+                {
+                    /* FPSR.QC = '1'; */
+                    FPSR[27] = true; // TODO: Add named fields.
+                }
+            }
+
+            V(d, result);
+        }
+
+        // uqsub_advsimd.html#UQSUB_asisdsame_only
+        public static void Uqsub_S(Bits size, Bits Rm, Bits Rn, Bits Rd)
+        {
+            const bool U = true;
+
+            /* Decode Scalar */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+            int m = (int)UInt(Rm);
+
+            int esize = 8 << (int)UInt(size);
+            int datasize = esize;
+            int elements = 1;
+
+            bool unsigned = (U == true);
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operand1 = V(datasize, n);
+            Bits operand2 = V(datasize, m);
+            BigInteger element1;
+            BigInteger element2;
+            BigInteger diff;
+            bool sat;
+
+            for (int e = 0; e <= elements - 1; e++)
+            {
+                element1 = Int(Elem(operand1, e, esize), unsigned);
+                element2 = Int(Elem(operand2, e, esize), unsigned);
+
+                diff = element1 - element2;
+
+                (Bits _result, bool _sat) = SatQ(diff, esize, unsigned);
+                Elem(result, e, esize, _result);
+                sat = _sat;
+
+                if (sat)
+                {
+                    /* FPSR.QC = '1'; */
+                    FPSR[27] = true; // TODO: Add named fields.
+                }
+            }
+
+            V(d, result);
+        }
+
+        // uqsub_advsimd.html#UQSUB_asimdsame_only
+        public static void Uqsub_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd)
+        {
+            const bool U = true;
+
+            /* Decode Vector */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+            int m = (int)UInt(Rm);
+
+            /* if size:Q == '110' then ReservedValue(); */
+
+            int esize = 8 << (int)UInt(size);
+            int datasize = (Q ? 128 : 64);
+            int elements = datasize / esize;
+
+            bool unsigned = (U == true);
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operand1 = V(datasize, n);
+            Bits operand2 = V(datasize, m);
+            BigInteger element1;
+            BigInteger element2;
+            BigInteger diff;
+            bool sat;
+
+            for (int e = 0; e <= elements - 1; e++)
+            {
+                element1 = Int(Elem(operand1, e, esize), unsigned);
+                element2 = Int(Elem(operand2, e, esize), unsigned);
+
+                diff = element1 - element2;
+
+                (Bits _result, bool _sat) = SatQ(diff, esize, unsigned);
+                Elem(result, e, esize, _result);
+                sat = _sat;
+
+                if (sat)
+                {
+                    /* FPSR.QC = '1'; */
+                    FPSR[27] = true; // TODO: Add named fields.
+                }
+            }
+
+            V(d, result);
+        }
+
         // usubw_advsimd.html
         public static void Usubw_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd)
         {
diff --git a/Ryujinx.Tests/Cpu/Tester/Pseudocode.cs b/Ryujinx.Tests/Cpu/Tester/Pseudocode.cs
index 3a877fb1a2..6c4dfa92ba 100644
--- a/Ryujinx.Tests/Cpu/Tester/Pseudocode.cs
+++ b/Ryujinx.Tests/Cpu/Tester/Pseudocode.cs
@@ -1193,9 +1193,9 @@ namespace Ryujinx.Tests.Cpu.Tester
                 result = BigInteger.Pow(2, N) - 1;
                 saturated = true;
             }
-            else if (i < 0)
+            else if (i < (BigInteger)0)
             {
-                result = 0;
+                result = (BigInteger)0;
                 saturated = true;
             }
             else