From 063fae50fe25388d10e9ec1915c561dc0f4d519d Mon Sep 17 00:00:00 2001
From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>
Date: Sun, 15 Jul 2018 05:53:26 +0200
Subject: [PATCH] Fix EmitHighNarrow(), EmitSaturatingNarrowOp() when Rd == Rn
 || Rd == Rm (& Part != 0). Optimization of EmitVectorTranspose(),
 EmitVectorUnzip(), EmitVectorZip() algorithms (reduction of the number of
 operations and their complexity). Add 12 Tests about Trn1/2, Uzp1/2, Zip1/2
 (V) instructions. (#268)

* Update CpuTestSimdArithmetic.cs

* Update CpuTestSimd.cs

* Update CpuTestSimdReg.cs

* Update Instructions.cs

* Update AInstEmitSimdArithmetic.cs

* Update AInstEmitSimdHelper.cs

* Update AInstEmitSimdMove.cs

* Delete CpuTestSimdMove.cs
---
 .../Instruction/AInstEmitSimdArithmetic.cs    |   12 +-
 .../Instruction/AInstEmitSimdHelper.cs        |   12 +-
 ChocolArm64/Instruction/AInstEmitSimdMove.cs  |   47 +-
 Ryujinx.Tests/Cpu/CpuTestSimd.cs              |  913 +++---
 Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs    |   41 +-
 Ryujinx.Tests/Cpu/CpuTestSimdMove.cs          |  136 -
 Ryujinx.Tests/Cpu/CpuTestSimdReg.cs           | 2532 ++++++++++-------
 Ryujinx.Tests/Cpu/Tester/Instructions.cs      |  208 ++
 8 files changed, 2385 insertions(+), 1516 deletions(-)
 delete mode 100644 Ryujinx.Tests/Cpu/CpuTestSimdMove.cs

diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
index a39ffc093e..36bb1cbf16 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
@@ -163,12 +163,19 @@ namespace ChocolArm64.Instruction
             AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
 
             int Elems = 8 >> Op.Size;
+
             int ESize = 8 << Op.Size;
 
             int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0;
 
             long RoundConst = 1L << (ESize - 1);
 
+            if (Part != 0)
+            {
+                Context.EmitLdvec(Op.Rd);
+                Context.EmitStvectmp();
+            }
+
             for (int Index = 0; Index < Elems; Index++)
             {
                 EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size + 1);
@@ -185,9 +192,12 @@ namespace ChocolArm64.Instruction
 
                 Context.EmitLsr(ESize);
 
-                EmitVectorInsert(Context, Op.Rd, Part + Index, Op.Size);
+                EmitVectorInsertTmp(Context, Part + Index, Op.Size);
             }
 
+            Context.EmitLdvectmp();
+            Context.EmitStvec(Op.Rd);
+
             if (Part == 0)
             {
                 EmitVectorZeroUpper(Context, Op.Rd);
diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
index 1f7a2dad13..7716e2987a 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
@@ -813,6 +813,7 @@ namespace ChocolArm64.Instruction
             AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
 
             int Elems = !Scalar ? 8 >> Op.Size : 1;
+
             int ESize = 8 << Op.Size;
 
             int Part = !Scalar && (Op.RegisterSize == ARegisterSize.SIMD128) ? Elems : 0;
@@ -823,6 +824,12 @@ namespace ChocolArm64.Instruction
             Context.EmitLdc_I8(0L);
             Context.EmitSttmp();
 
+            if (Part != 0)
+            {
+                Context.EmitLdvec(Op.Rd);
+                Context.EmitStvectmp();
+            }
+
             for (int Index = 0; Index < Elems; Index++)
             {
                 AILLabel LblLe    = new AILLabel();
@@ -867,9 +874,12 @@ namespace ChocolArm64.Instruction
                     EmitVectorZeroLower(Context, Op.Rd);
                 }
 
-                EmitVectorInsert(Context, Op.Rd, Part + Index, Op.Size);
+                EmitVectorInsertTmp(Context, Part + Index, Op.Size);
             }
 
+            Context.EmitLdvectmp();
+            Context.EmitStvec(Op.Rd);
+
             if (Part == 0)
             {
                 EmitVectorZeroUpper(Context, Op.Rd);
diff --git a/ChocolArm64/Instruction/AInstEmitSimdMove.cs b/ChocolArm64/Instruction/AInstEmitSimdMove.cs
index 739f01c62b..592cab733e 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdMove.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdMove.cs
@@ -331,17 +331,18 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
 
-            int Bytes = Op.GetBitsCount() >> 3;
+            int Words = Op.GetBitsCount() >> 4;
+            int Pairs = Words >> Op.Size;
 
-            int Elems = Bytes >> Op.Size;
-
-            for (int Index = 0; Index < Elems; Index++)
+            for (int Index = 0; Index < Pairs; Index++)
             {
-                int Elem = (Index & ~1) + Part;
+                int Idx = Index << 1;
 
-                EmitVectorExtractZx(Context, (Index & 1) == 0 ? Op.Rn : Op.Rm, Elem, Op.Size);
+                EmitVectorExtractZx(Context, Op.Rn, Idx + Part, Op.Size);
+                EmitVectorExtractZx(Context, Op.Rm, Idx + Part, Op.Size);
 
-                EmitVectorInsertTmp(Context, Index, Op.Size);
+                EmitVectorInsertTmp(Context, Idx + 1, Op.Size);
+                EmitVectorInsertTmp(Context, Idx    , Op.Size);
             }
 
             Context.EmitLdvectmp();
@@ -357,18 +358,18 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
 
-            int Bytes = Op.GetBitsCount() >> 3;
+            int Words = Op.GetBitsCount() >> 4;
+            int Pairs = Words >> Op.Size;
 
-            int Elems = Bytes >> Op.Size;
-            int Half  = Elems >> 1;
-
-            for (int Index = 0; Index < Elems; Index++)
+            for (int Index = 0; Index < Pairs; Index++)
             {
-                int Elem = Part + ((Index & (Half - 1)) << 1);
+                int Idx = Index << 1;
 
-                EmitVectorExtractZx(Context, Index < Half ? Op.Rn : Op.Rm, Elem, Op.Size);
+                EmitVectorExtractZx(Context, Op.Rn, Idx + Part, Op.Size);
+                EmitVectorExtractZx(Context, Op.Rm, Idx + Part, Op.Size);
 
-                EmitVectorInsertTmp(Context, Index, Op.Size);
+                EmitVectorInsertTmp(Context, Pairs + Index, Op.Size);
+                EmitVectorInsertTmp(Context,         Index, Op.Size);
             }
 
             Context.EmitLdvectmp();
@@ -384,18 +385,20 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
 
-            int Bytes = Op.GetBitsCount() >> 3;
+            int Words = Op.GetBitsCount() >> 4;
+            int Pairs = Words >> Op.Size;
 
-            int Elems = Bytes >> Op.Size;
-            int Half  = Elems >> 1;
+            int Base = Part != 0 ? Pairs : 0;
 
-            for (int Index = 0; Index < Elems; Index++)
+            for (int Index = 0; Index < Pairs; Index++)
             {
-                int Elem = Part * Half + (Index >> 1);
+                int Idx = Index << 1;
 
-                EmitVectorExtractZx(Context, (Index & 1) == 0 ? Op.Rn : Op.Rm, Elem, Op.Size);
+                EmitVectorExtractZx(Context, Op.Rn, Base + Index, Op.Size);
+                EmitVectorExtractZx(Context, Op.Rm, Base + Index, Op.Size);
 
-                EmitVectorInsertTmp(Context, Index, Op.Size);
+                EmitVectorInsertTmp(Context, Idx + 1, Op.Size);
+                EmitVectorInsertTmp(Context, Idx    , Op.Size);
             }
 
             Context.EmitLdvectmp();
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs
index 02c5b25b24..b84d29575d 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs
@@ -76,64 +76,53 @@ namespace Ryujinx.Tests.Cpu
         }
 #endregion
 
+        private const int RndCnt = 1;
+
         [Test, Description("ABS <V><d>, <V><n>")]
-        public void Abs_S_D([ValueSource("_1D_")] [Random(1)] ulong A)
+        public void Abs_S_D([Values(0u)] uint Rd,
+                            [Values(1u, 0u)] uint Rn,
+                            [ValueSource("_1D_")] [Random(RndCnt)] ulong Z,
+                            [ValueSource("_1D_")] [Random(RndCnt)] ulong A)
         {
-            uint Opcode = 0x5EE0B820; // ABS D0, D1
+            uint Opcode = 0x5EE0B800; // ABS D0, D0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Abs_S(Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Description("ABS <Vd>.<T>, <Vn>.<T>")]
-        public void Abs_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A,
+        public void Abs_V_8B_4H_2S([Values(0u)] uint Rd,
+                                   [Values(1u, 0u)] uint Rn,
+                                   [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                   [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
                                    [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
         {
-            uint Opcode = 0x0E20B820; // ABS V0.8B, V1.8B
+            uint Opcode = 0x0E20B800; // ABS V0.8B, V0.8B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Abs_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
-            Assert.Multiple(() =>
-            {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
-            });
-        }
-
-        [Test, Pairwise, Description("ABS <Vd>.<T>, <Vn>.<T>")]
-        public void Abs_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0,
-                                       [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1,
-                                       [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
-        {
-            uint Opcode = 0x4E20B820; // ABS V0.16B, V1.16B
-            Opcode |= ((size & 3) << 22);
-            Bits Op = new Bits(Opcode);
-
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1);
-
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            SimdFp.Abs_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
-
             Assert.Multiple(() =>
             {
                 Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
@@ -141,114 +130,157 @@ namespace Ryujinx.Tests.Cpu
             });
         }
 
-        [Test, Pairwise, Description("ADDP <V><d>, <Vn>.<T>")]
-        public void Addp_S_2DD([ValueSource("_1D_")] [Random(1)] ulong A0,
-                               [ValueSource("_1D_")] [Random(1)] ulong A1)
+        [Test, Description("ABS <Vd>.<T>, <Vn>.<T>")]
+        public void Abs_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                       [Values(1u, 0u)] uint Rn,
+                                       [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                       [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
+                                       [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
         {
-            uint Opcode = 0x5EF1B820; // ADDP D0, V1.2D
+            uint Opcode = 0x4E20B800; // ABS V0.16B, V0.16B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            SimdFp.Abs_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Description("ADDP <V><d>, <Vn>.<T>")]
+        public void Addp_S_2DD([Values(0u)] uint Rd,
+                               [Values(1u, 0u)] uint Rn,
+                               [ValueSource("_1D_")] [Random(RndCnt)] ulong Z,
+                               [ValueSource("_1D_")] [Random(RndCnt)] ulong A)
+        {
+            uint Opcode = 0x5EF1B800; // ADDP D0, V0.2D
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
             SimdFp.Addp_S(Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Description("ADDV <V><d>, <Vn>.<T>")]
-        public void Addv_V_8BB_4HH([ValueSource("_8B4H_")] [Random(1)] ulong A,
-                                   [Values(0b00u, 0b01u)] uint size) // <8B, 4H>
+        public void Addv_V_8BB_4HH([Values(0u)] uint Rd,
+                                   [Values(1u, 0u)] uint Rn,
+                                   [ValueSource("_8B4H_")] [Random(RndCnt)] ulong Z,
+                                   [ValueSource("_8B4H_")] [Random(RndCnt)] ulong A,
+                                   [Values(0b00u, 0b01u)] uint size) // <8BB, 4HH>
         {
-            uint Opcode = 0x0E31B820; // ADDV B0, V1.8B
+            uint Opcode = 0x0E31B800; // ADDV B0, V0.8B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(),
-                                                 TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(0, 0, new Bits(TestContext.CurrentContext.Random.NextULong()));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Addv_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
-        [Test, Pairwise, Description("ADDV <V><d>, <Vn>.<T>")]
-        public void Addv_V_16BB_8HH_4SS([ValueSource("_8B4H2S_")] [Random(1)] ulong A0,
-                                        [ValueSource("_8B4H2S_")] [Random(1)] ulong A1,
-                                        [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S>
+        [Test, Description("ADDV <V><d>, <Vn>.<T>")]
+        public void Addv_V_16BB_8HH_4SS([Values(0u)] uint Rd,
+                                        [Values(1u, 0u)] uint Rn,
+                                        [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                        [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                        [Values(0b00u, 0b01u, 0b10u)] uint size) // <16BB, 8HH, 4SS>
         {
-            uint Opcode = 0x4E31B820; // ADDV B0, V1.16B
+            uint Opcode = 0x4E31B800; // ADDV B0, V0.16B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(),
-                                                 TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(0, 0, new Bits(TestContext.CurrentContext.Random.NextULong()));
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
             SimdFp.Addv_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Description("CLS <Vd>.<T>, <Vn>.<T>")]
-        public void Cls_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A,
+        public void Cls_V_8B_4H_2S([Values(0u)] uint Rd,
+                                   [Values(1u, 0u)] uint Rn,
+                                   [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                   [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
                                    [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
         {
-            uint Opcode = 0x0E204820; // CLS V0.8B, V1.8B
+            uint Opcode = 0x0E204800; // CLS V0.8B, V0.8B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Cls_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
-        [Test, Pairwise, Description("CLS <Vd>.<T>, <Vn>.<T>")]
-        public void Cls_V_16B_8H_4S([ValueSource("_8B4H2S_")] [Random(1)] ulong A0,
-                                    [ValueSource("_8B4H2S_")] [Random(1)] ulong A1,
+        [Test, Description("CLS <Vd>.<T>, <Vn>.<T>")]
+        public void Cls_V_16B_8H_4S([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
                                     [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S>
         {
-            uint Opcode = 0x4E204820; // CLS V0.16B, V1.16B
+            uint Opcode = 0x4E204800; // CLS V0.16B, V0.16B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
             SimdFp.Cls_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -259,41 +291,50 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Description("CLZ <Vd>.<T>, <Vn>.<T>")]
-        public void Clz_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A,
+        public void Clz_V_8B_4H_2S([Values(0u)] uint Rd,
+                                   [Values(1u, 0u)] uint Rn,
+                                   [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                   [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
                                    [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
         {
-            uint Opcode = 0x2E204820; // CLZ V0.8B, V1.8B
+            uint Opcode = 0x2E204800; // CLZ V0.8B, V0.8B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Clz_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
-        [Test, Pairwise, Description("CLZ <Vd>.<T>, <Vn>.<T>")]
-        public void Clz_V_16B_8H_4S([ValueSource("_8B4H2S_")] [Random(1)] ulong A0,
-                                    [ValueSource("_8B4H2S_")] [Random(1)] ulong A1,
+        [Test, Description("CLZ <Vd>.<T>, <Vn>.<T>")]
+        public void Clz_V_16B_8H_4S([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
                                     [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S>
         {
-            uint Opcode = 0x6E204820; // CLZ V0.16B, V1.16B
+            uint Opcode = 0x6E204800; // CLZ V0.16B, V0.16B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
             SimdFp.Clz_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -304,61 +345,75 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Description("CMEQ <V><d>, <V><n>, #0")]
-        public void Cmeq_S_D([ValueSource("_1D_")] [Random(1)] ulong A)
+        public void Cmeq_S_D([Values(0u)] uint Rd,
+                             [Values(1u, 0u)] uint Rn,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong Z,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong A)
         {
-            uint Opcode = 0x5EE09820; // CMEQ D0, D1, #0
+            uint Opcode = 0x5EE09800; // CMEQ D0, D0, #0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Cmeq_Zero_S(Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Description("CMEQ <Vd>.<T>, <Vn>.<T>, #0")]
-        public void Cmeq_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A,
+        public void Cmeq_V_8B_4H_2S([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
                                     [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
         {
-            uint Opcode = 0x0E209820; // CMEQ V0.8B, V1.8B, #0
+            uint Opcode = 0x0E209800; // CMEQ V0.8B, V0.8B, #0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Cmeq_Zero_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
-        [Test, Pairwise, Description("CMEQ <Vd>.<T>, <Vn>.<T>, #0")]
-        public void Cmeq_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0,
-                                        [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1,
+        [Test, Description("CMEQ <Vd>.<T>, <Vn>.<T>, #0")]
+        public void Cmeq_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                        [Values(1u, 0u)] uint Rn,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
                                         [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
         {
-            uint Opcode = 0x4E209820; // CMEQ V0.16B, V1.16B, #0
+            uint Opcode = 0x4E209800; // CMEQ V0.16B, V0.16B, #0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
             SimdFp.Cmeq_Zero_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -369,61 +424,75 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Description("CMGE <V><d>, <V><n>, #0")]
-        public void Cmge_S_D([ValueSource("_1D_")] [Random(1)] ulong A)
+        public void Cmge_S_D([Values(0u)] uint Rd,
+                             [Values(1u, 0u)] uint Rn,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong Z,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong A)
         {
-            uint Opcode = 0x7EE08820; // CMGE D0, D1, #0
+            uint Opcode = 0x7EE08800; // CMGE D0, D0, #0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Cmge_Zero_S(Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Description("CMGE <Vd>.<T>, <Vn>.<T>, #0")]
-        public void Cmge_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A,
+        public void Cmge_V_8B_4H_2S([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
                                     [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
         {
-            uint Opcode = 0x2E208820; // CMGE V0.8B, V1.8B, #0
+            uint Opcode = 0x2E208800; // CMGE V0.8B, V0.8B, #0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Cmge_Zero_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
-        [Test, Pairwise, Description("CMGE <Vd>.<T>, <Vn>.<T>, #0")]
-        public void Cmge_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0,
-                                        [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1,
+        [Test, Description("CMGE <Vd>.<T>, <Vn>.<T>, #0")]
+        public void Cmge_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                        [Values(1u, 0u)] uint Rn,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
                                         [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
         {
-            uint Opcode = 0x6E208820; // CMGE V0.16B, V1.16B, #0
+            uint Opcode = 0x6E208800; // CMGE V0.16B, V0.16B, #0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
             SimdFp.Cmge_Zero_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -434,61 +503,75 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Description("CMGT <V><d>, <V><n>, #0")]
-        public void Cmgt_S_D([ValueSource("_1D_")] [Random(1)] ulong A)
+        public void Cmgt_S_D([Values(0u)] uint Rd,
+                             [Values(1u, 0u)] uint Rn,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong Z,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong A)
         {
-            uint Opcode = 0x5EE08820; // CMGT D0, D1, #0
+            uint Opcode = 0x5EE08800; // CMGT D0, D0, #0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Cmgt_Zero_S(Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Description("CMGT <Vd>.<T>, <Vn>.<T>, #0")]
-        public void Cmgt_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A,
+        public void Cmgt_V_8B_4H_2S([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
                                     [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
         {
-            uint Opcode = 0x0E208820; // CMGT V0.8B, V1.8B, #0
+            uint Opcode = 0x0E208800; // CMGT V0.8B, V0.8B, #0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Cmgt_Zero_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
-        [Test, Pairwise, Description("CMGT <Vd>.<T>, <Vn>.<T>, #0")]
-        public void Cmgt_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0,
-                                        [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1,
+        [Test, Description("CMGT <Vd>.<T>, <Vn>.<T>, #0")]
+        public void Cmgt_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                        [Values(1u, 0u)] uint Rn,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
                                         [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
         {
-            uint Opcode = 0x4E208820; // CMGT V0.16B, V1.16B, #0
+            uint Opcode = 0x4E208800; // CMGT V0.16B, V0.16B, #0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
             SimdFp.Cmgt_Zero_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -499,61 +582,75 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Description("CMLE <V><d>, <V><n>, #0")]
-        public void Cmle_S_D([ValueSource("_1D_")] [Random(1)] ulong A)
+        public void Cmle_S_D([Values(0u)] uint Rd,
+                             [Values(1u, 0u)] uint Rn,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong Z,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong A)
         {
-            uint Opcode = 0x7EE09820; // CMLE D0, D1, #0
+            uint Opcode = 0x7EE09800; // CMLE D0, D0, #0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Cmle_S(Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Description("CMLE <Vd>.<T>, <Vn>.<T>, #0")]
-        public void Cmle_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A,
+        public void Cmle_V_8B_4H_2S([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
                                     [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
         {
-            uint Opcode = 0x2E209820; // CMLE V0.8B, V1.8B, #0
+            uint Opcode = 0x2E209800; // CMLE V0.8B, V0.8B, #0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Cmle_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
-        [Test, Pairwise, Description("CMLE <Vd>.<T>, <Vn>.<T>, #0")]
-        public void Cmle_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0,
-                                        [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1,
+        [Test, Description("CMLE <Vd>.<T>, <Vn>.<T>, #0")]
+        public void Cmle_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                        [Values(1u, 0u)] uint Rn,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
                                         [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
         {
-            uint Opcode = 0x6E209820; // CMLE V0.16B, V1.16B, #0
+            uint Opcode = 0x6E209800; // CMLE V0.16B, V0.16B, #0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
             SimdFp.Cmle_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -564,61 +661,75 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Description("CMLT <V><d>, <V><n>, #0")]
-        public void Cmlt_S_D([ValueSource("_1D_")] [Random(1)] ulong A)
+        public void Cmlt_S_D([Values(0u)] uint Rd,
+                             [Values(1u, 0u)] uint Rn,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong Z,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong A)
         {
-            uint Opcode = 0x5EE0A820; // CMLT D0, D1, #0
+            uint Opcode = 0x5EE0A800; // CMLT D0, D0, #0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Cmlt_S(Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Description("CMLT <Vd>.<T>, <Vn>.<T>, #0")]
-        public void Cmlt_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A,
+        public void Cmlt_V_8B_4H_2S([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
                                     [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
         {
-            uint Opcode = 0x0E20A820; // CMLT V0.8B, V1.8B, #0
+            uint Opcode = 0x0E20A800; // CMLT V0.8B, V0.8B, #0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Cmlt_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
-        [Test, Pairwise, Description("CMLT <Vd>.<T>, <Vn>.<T>, #0")]
-        public void Cmlt_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0,
-                                        [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1,
+        [Test, Description("CMLT <Vd>.<T>, <Vn>.<T>, #0")]
+        public void Cmlt_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                        [Values(1u, 0u)] uint Rn,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
                                         [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
         {
-            uint Opcode = 0x4E20A820; // CMLT V0.16B, V1.16B, #0
+            uint Opcode = 0x4E20A800; // CMLT V0.16B, V0.16B, #0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
             SimdFp.Cmlt_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -629,37 +740,46 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Description("CNT <Vd>.<T>, <Vn>.<T>")]
-        public void Cnt_V_8B([ValueSource("_8B_")] [Random(1)] ulong A)
+        public void Cnt_V_8B([Values(0u)] uint Rd,
+                             [Values(1u, 0u)] uint Rn,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong A)
         {
-            uint Opcode = 0x0E205820; // CNT V0.8B, V1.8B
+            uint Opcode = 0x0E205800; // CNT V0.8B, V0.8B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Cnt_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
-        [Test, Pairwise, Description("CNT <Vd>.<T>, <Vn>.<T>")]
-        public void Cnt_V_16B([ValueSource("_8B_")] [Random(1)] ulong A0,
-                              [ValueSource("_8B_")] [Random(1)] ulong A1)
+        [Test, Description("CNT <Vd>.<T>, <Vn>.<T>")]
+        public void Cnt_V_16B([Values(0u)] uint Rd,
+                              [Values(1u, 0u)] uint Rn,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong A)
         {
-            uint Opcode = 0x4E205820; // CNT V0.16B, V1.16B
+            uint Opcode = 0x4E205800; // CNT V0.16B, V0.16B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
             SimdFp.Cnt_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -670,61 +790,75 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Description("NEG <V><d>, <V><n>")]
-        public void Neg_S_D([ValueSource("_1D_")] [Random(1)] ulong A)
+        public void Neg_S_D([Values(0u)] uint Rd,
+                            [Values(1u, 0u)] uint Rn,
+                            [ValueSource("_1D_")] [Random(RndCnt)] ulong Z,
+                            [ValueSource("_1D_")] [Random(RndCnt)] ulong A)
         {
-            uint Opcode = 0x7EE0B820; // NEG D0, D1
+            uint Opcode = 0x7EE0B800; // NEG D0, D0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Neg_S(Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Description("NEG <Vd>.<T>, <Vn>.<T>")]
-        public void Neg_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A,
+        public void Neg_V_8B_4H_2S([Values(0u)] uint Rd,
+                                   [Values(1u, 0u)] uint Rn,
+                                   [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                   [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
                                    [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
         {
-            uint Opcode = 0x2E20B820; // NEG V0.8B, V1.8B
+            uint Opcode = 0x2E20B800; // NEG V0.8B, V0.8B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Neg_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
-        [Test, Pairwise, Description("NEG <Vd>.<T>, <Vn>.<T>")]
-        public void Neg_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0,
-                                       [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1,
+        [Test, Description("NEG <Vd>.<T>, <Vn>.<T>")]
+        public void Neg_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                       [Values(1u, 0u)] uint Rn,
+                                       [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                       [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
                                        [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
         {
-            uint Opcode = 0x6E20B820; // NEG V0.16B, V1.16B
+            uint Opcode = 0x6E20B800; // NEG V0.16B, V0.16B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
             SimdFp.Neg_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -735,37 +869,46 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Description("NOT <Vd>.<T>, <Vn>.<T>")]
-        public void Not_V_8B([ValueSource("_8B_")] [Random(1)] ulong A)
+        public void Not_V_8B([Values(0u)] uint Rd,
+                             [Values(1u, 0u)] uint Rn,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong A)
         {
-            uint Opcode = 0x2E205820; // NOT V0.8B, V1.8B
+            uint Opcode = 0x2E205800; // NOT V0.8B, V0.8B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Not_V(Op[30], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
-        [Test, Pairwise, Description("NOT <Vd>.<T>, <Vn>.<T>")]
-        public void Not_V_16B([ValueSource("_8B_")] [Random(1)] ulong A0,
-                              [ValueSource("_8B_")] [Random(1)] ulong A1)
+        [Test, Description("NOT <Vd>.<T>, <Vn>.<T>")]
+        public void Not_V_16B([Values(0u)] uint Rd,
+                              [Values(1u, 0u)] uint Rn,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong A)
         {
-            uint Opcode = 0x6E205820; // NOT V0.16B, V1.16B
+            uint Opcode = 0x6E205800; // NOT V0.16B, V0.16B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
             SimdFp.Not_V(Op[30], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -776,37 +919,46 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Description("RBIT <Vd>.<T>, <Vn>.<T>")]
-        public void Rbit_V_8B([ValueSource("_8B_")] [Random(1)] ulong A)
+        public void Rbit_V_8B([Values(0u)] uint Rd,
+                              [Values(1u, 0u)] uint Rn,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong A)
         {
-            uint Opcode = 0x2E605820; // RBIT V0.8B, V1.8B
+            uint Opcode = 0x2E605800; // RBIT V0.8B, V0.8B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Rbit_V(Op[30], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
-        [Test, Pairwise, Description("RBIT <Vd>.<T>, <Vn>.<T>")]
-        public void Rbit_V_16B([ValueSource("_8B_")] [Random(1)] ulong A0,
-                               [ValueSource("_8B_")] [Random(1)] ulong A1)
+        [Test, Description("RBIT <Vd>.<T>, <Vn>.<T>")]
+        public void Rbit_V_16B([Values(0u)] uint Rd,
+                               [Values(1u, 0u)] uint Rn,
+                               [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                               [ValueSource("_8B_")] [Random(RndCnt)] ulong A)
         {
-            uint Opcode = 0x6E605820; // RBIT V0.16B, V1.16B
+            uint Opcode = 0x6E605800; // RBIT V0.16B, V0.16B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
             SimdFp.Rbit_V(Op[30], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -817,37 +969,46 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Description("REV16 <Vd>.<T>, <Vn>.<T>")]
-        public void Rev16_V_8B([ValueSource("_8B_")] [Random(1)] ulong A)
+        public void Rev16_V_8B([Values(0u)] uint Rd,
+                               [Values(1u, 0u)] uint Rn,
+                               [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                               [ValueSource("_8B_")] [Random(RndCnt)] ulong A)
         {
-            uint Opcode = 0x0E201820; // REV16 V0.8B, V1.8B
+            uint Opcode = 0x0E201800; // REV16 V0.8B, V0.8B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Rev16_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
-        [Test, Pairwise, Description("REV16 <Vd>.<T>, <Vn>.<T>")]
-        public void Rev16_V_16B([ValueSource("_8B_")] [Random(1)] ulong A0,
-                                [ValueSource("_8B_")] [Random(1)] ulong A1)
+        [Test, Description("REV16 <Vd>.<T>, <Vn>.<T>")]
+        public void Rev16_V_16B([Values(0u)] uint Rd,
+                                [Values(1u, 0u)] uint Rn,
+                                [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                                [ValueSource("_8B_")] [Random(RndCnt)] ulong A)
         {
-            uint Opcode = 0x4E201820; // REV16 V0.16B, V1.16B
+            uint Opcode = 0x4E201800; // REV16 V0.16B, V0.16B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
             SimdFp.Rev16_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -858,41 +1019,50 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Description("REV32 <Vd>.<T>, <Vn>.<T>")]
-        public void Rev32_V_8B_4H([ValueSource("_8B4H_")] [Random(1)] ulong A,
+        public void Rev32_V_8B_4H([Values(0u)] uint Rd,
+                                  [Values(1u, 0u)] uint Rn,
+                                  [ValueSource("_8B4H_")] [Random(RndCnt)] ulong Z,
+                                  [ValueSource("_8B4H_")] [Random(RndCnt)] ulong A,
                                   [Values(0b00u, 0b01u)] uint size) // <8B, 4H>
         {
-            uint Opcode = 0x2E200820; // REV32 V0.8B, V1.8B
+            uint Opcode = 0x2E200800; // REV32 V0.8B, V0.8B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Rev32_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
-        [Test, Pairwise, Description("REV32 <Vd>.<T>, <Vn>.<T>")]
-        public void Rev32_V_16B_8H([ValueSource("_8B4H_")] [Random(1)] ulong A0,
-                                   [ValueSource("_8B4H_")] [Random(1)] ulong A1,
+        [Test, Description("REV32 <Vd>.<T>, <Vn>.<T>")]
+        public void Rev32_V_16B_8H([Values(0u)] uint Rd,
+                                   [Values(1u, 0u)] uint Rn,
+                                   [ValueSource("_8B4H_")] [Random(RndCnt)] ulong Z,
+                                   [ValueSource("_8B4H_")] [Random(RndCnt)] ulong A,
                                    [Values(0b00u, 0b01u)] uint size) // <16B, 8H>
         {
-            uint Opcode = 0x6E200820; // REV32 V0.16B, V1.16B
+            uint Opcode = 0x6E200800; // REV32 V0.16B, V0.16B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
             SimdFp.Rev32_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -903,41 +1073,50 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Description("REV64 <Vd>.<T>, <Vn>.<T>")]
-        public void Rev64_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A,
+        public void Rev64_V_8B_4H_2S([Values(0u)] uint Rd,
+                                     [Values(1u, 0u)] uint Rn,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
                                      [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
         {
-            uint Opcode = 0x0E200820; // REV64 V0.8B, V1.8B
+            uint Opcode = 0x0E200800; // REV64 V0.8B, V0.8B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Rev64_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
-        [Test, Pairwise, Description("REV64 <Vd>.<T>, <Vn>.<T>")]
-        public void Rev64_V_16B_8H_4S([ValueSource("_8B4H2S_")] [Random(1)] ulong A0,
-                                      [ValueSource("_8B4H2S_")] [Random(1)] ulong A1,
+        [Test, Description("REV64 <Vd>.<T>, <Vn>.<T>")]
+        public void Rev64_V_16B_8H_4S([Values(0u)] uint Rd,
+                                      [Values(1u, 0u)] uint Rn,
+                                      [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                      [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
                                       [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S>
         {
-            uint Opcode = 0x4E200820; // REV64 V0.16B, V1.16B
+            uint Opcode = 0x4E200800; // REV64 V0.16B, V0.16B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
             SimdFp.Rev64_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -948,228 +1127,252 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Description("SQXTN <Vb><d>, <Va><n>")]
-        public void Sqxtn_S_HB_SH_DS([ValueSource("_1H1S1D_")] [Random(1)] ulong A,
+        public void Sqxtn_S_HB_SH_DS([Values(0u)] uint Rd,
+                                     [Values(1u, 0u)] uint Rn,
+                                     [ValueSource("_1H1S1D_")] [Random(RndCnt)] ulong Z,
+                                     [ValueSource("_1H1S1D_")] [Random(RndCnt)] ulong A,
                                      [Values(0b00u, 0b01u, 0b10u)] uint size) // <HB, SH, DS>
         {
-            uint Opcode = 0x5E214820; // SQXTN B0, H1
+            uint Opcode = 0x5E214800; // SQXTN B0, H0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(),
-                                                 TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(0, 0, new Bits(TestContext.CurrentContext.Random.NextULong()));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Sqxtn_S(Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
             Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27]));
         }
 
-        [Test, Pairwise, Description("SQXTN{2} <Vd>.<Tb>, <Vn>.<Ta>")]
-        public void Sqxtn_V_8H8B_4S4H_2D2S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0,
-                                           [ValueSource("_4H2S1D_")] [Random(1)] ulong A1,
+        [Test, Description("SQXTN{2} <Vd>.<Tb>, <Vn>.<Ta>")]
+        public void Sqxtn_V_8H8B_4S4H_2D2S([Values(0u)] uint Rd,
+                                           [Values(1u, 0u)] uint Rn,
+                                           [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                           [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A,
                                            [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H8B, 4S4H, 2D2S>
         {
-            uint Opcode = 0x0E214820; // SQXTN V0.8B, V1.8H
+            uint Opcode = 0x0E214800; // SQXTN V0.8B, V0.8H
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
             SimdFp.Sqxtn_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
             Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27]));
         }
 
-        [Test, Pairwise, Description("SQXTN{2} <Vd>.<Tb>, <Vn>.<Ta>")]
-        public void Sqxtn_V_8H16B_4S8H_2D4S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0,
-                                            [ValueSource("_4H2S1D_")] [Random(1)] ulong A1,
+        [Test, Description("SQXTN{2} <Vd>.<Tb>, <Vn>.<Ta>")]
+        public void Sqxtn_V_8H16B_4S8H_2D4S([Values(0u)] uint Rd,
+                                            [Values(1u, 0u)] uint Rn,
+                                            [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                            [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A,
                                             [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H16B, 4S8H, 2D4S>
         {
-            uint Opcode = 0x4E214820; // SQXTN2 V0.16B, V1.8H
+            uint Opcode = 0x4E214800; // SQXTN2 V0.16B, V0.8H
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            ulong _E0 = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> V0 = MakeVectorE0(_E0);
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
             SimdFp.Sqxtn_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(_E0));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
                 Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
             Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27]));
         }
 
         [Test, Description("SQXTUN <Vb><d>, <Va><n>")]
-        public void Sqxtun_S_HB_SH_DS([ValueSource("_1H1S1D_")] [Random(1)] ulong A,
+        public void Sqxtun_S_HB_SH_DS([Values(0u)] uint Rd,
+                                      [Values(1u, 0u)] uint Rn,
+                                      [ValueSource("_1H1S1D_")] [Random(RndCnt)] ulong Z,
+                                      [ValueSource("_1H1S1D_")] [Random(RndCnt)] ulong A,
                                       [Values(0b00u, 0b01u, 0b10u)] uint size) // <HB, SH, DS>
         {
-            uint Opcode = 0x7E212820; // SQXTUN B0, H1
+            uint Opcode = 0x7E212800; // SQXTUN B0, H0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(),
-                                                 TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(0, 0, new Bits(TestContext.CurrentContext.Random.NextULong()));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Sqxtun_S(Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
             Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27]));
         }
 
-        [Test, Pairwise, Description("SQXTUN{2} <Vd>.<Tb>, <Vn>.<Ta>")]
-        public void Sqxtun_V_8H8B_4S4H_2D2S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0,
-                                            [ValueSource("_4H2S1D_")] [Random(1)] ulong A1,
+        [Test, Description("SQXTUN{2} <Vd>.<Tb>, <Vn>.<Ta>")]
+        public void Sqxtun_V_8H8B_4S4H_2D2S([Values(0u)] uint Rd,
+                                            [Values(1u, 0u)] uint Rn,
+                                            [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                            [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A,
                                             [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H8B, 4S4H, 2D2S>
         {
-            uint Opcode = 0x2E212820; // SQXTUN V0.8B, V1.8H
+            uint Opcode = 0x2E212800; // SQXTUN V0.8B, V0.8H
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
             SimdFp.Sqxtun_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
             Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27]));
         }
 
-        [Test, Pairwise, Description("SQXTUN{2} <Vd>.<Tb>, <Vn>.<Ta>")]
-        public void Sqxtun_V_8H16B_4S8H_2D4S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0,
-                                             [ValueSource("_4H2S1D_")] [Random(1)] ulong A1,
+        [Test, Description("SQXTUN{2} <Vd>.<Tb>, <Vn>.<Ta>")]
+        public void Sqxtun_V_8H16B_4S8H_2D4S([Values(0u)] uint Rd,
+                                             [Values(1u, 0u)] uint Rn,
+                                             [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                             [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A,
                                              [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H16B, 4S8H, 2D4S>
         {
-            uint Opcode = 0x6E212820; // SQXTUN2 V0.16B, V1.8H
+            uint Opcode = 0x6E212800; // SQXTUN2 V0.16B, V0.8H
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            ulong _E0 = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> V0 = MakeVectorE0(_E0);
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
             SimdFp.Sqxtun_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(_E0));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
                 Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
             Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27]));
         }
 
         [Test, Description("UQXTN <Vb><d>, <Va><n>")]
-        public void Uqxtn_S_HB_SH_DS([ValueSource("_1H1S1D_")] [Random(1)] ulong A,
+        public void Uqxtn_S_HB_SH_DS([Values(0u)] uint Rd,
+                                     [Values(1u, 0u)] uint Rn,
+                                     [ValueSource("_1H1S1D_")] [Random(RndCnt)] ulong Z,
+                                     [ValueSource("_1H1S1D_")] [Random(RndCnt)] ulong A,
                                      [Values(0b00u, 0b01u, 0b10u)] uint size) // <HB, SH, DS>
         {
-            uint Opcode = 0x7E214820; // UQXTN B0, H1
+            uint Opcode = 0x7E214800; // UQXTN B0, H0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(),
-                                                 TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(0, 0, new Bits(TestContext.CurrentContext.Random.NextULong()));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             SimdFp.Uqxtn_S(Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
             Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27]));
         }
 
-        [Test, Pairwise, Description("UQXTN{2} <Vd>.<Tb>, <Vn>.<Ta>")]
-        public void Uqxtn_V_8H8B_4S4H_2D2S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0,
-                                           [ValueSource("_4H2S1D_")] [Random(1)] ulong A1,
+        [Test, Description("UQXTN{2} <Vd>.<Tb>, <Vn>.<Ta>")]
+        public void Uqxtn_V_8H8B_4S4H_2D2S([Values(0u)] uint Rd,
+                                           [Values(1u, 0u)] uint Rn,
+                                           [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                           [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A,
                                            [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H8B, 4S4H, 2D2S>
         {
-            uint Opcode = 0x2E214820; // UQXTN V0.8B, V1.8H
+            uint Opcode = 0x2E214800; // UQXTN V0.8B, V0.8H
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
             SimdFp.Uqxtn_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
             Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27]));
         }
 
-        [Test, Pairwise, Description("UQXTN{2} <Vd>.<Tb>, <Vn>.<Ta>")]
-        public void Uqxtn_V_8H16B_4S8H_2D4S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0,
-                                            [ValueSource("_4H2S1D_")] [Random(1)] ulong A1,
+        [Test, Description("UQXTN{2} <Vd>.<Tb>, <Vn>.<Ta>")]
+        public void Uqxtn_V_8H16B_4S8H_2D4S([Values(0u)] uint Rd,
+                                            [Values(1u, 0u)] uint Rn,
+                                            [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                            [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A,
                                             [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H16B, 4S8H, 2D4S>
         {
-            uint Opcode = 0x6E214820; // UQXTN2 V0.16B, V1.8H
+            uint Opcode = 0x6E214800; // UQXTN2 V0.16B, V0.8H
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            ulong _E0 = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> V0 = MakeVectorE0(_E0);
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
             SimdFp.Uqxtn_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(_E0));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
                 Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
             Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27]));
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs
index 2a0f5ed919..8e2d9a366e 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs
@@ -9,46 +9,6 @@ namespace Ryujinx.Tests.Cpu
 {
     public class CpuTestSimdArithmetic : CpuTest
     {
-        [TestCase(0xE228420u,   0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul)]
-        [TestCase(0xE228420u,   0x00000000FFFFFFFFul, 0x00000000FFFFFFFFul, 0x0000000000000001ul, 0x0000000000000001ul, 0x00000000FFFFFF00ul, 0x0000000000000000ul)]
-        [TestCase(0xE228420u,   0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFEFEFEFEFEFEFEFEul, 0x0000000000000000ul)]
-        [TestCase(0xE228420u,   0x0102030405060708ul, 0xAAAAAAAAAAAAAAAAul, 0x0807060504030201ul, 0x2222222222222222ul, 0x0909090909090909ul, 0x0000000000000000ul)]
-        [TestCase(0x4E228420u,  0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul)]
-        [TestCase(0x4E228420u,  0x00000000FFFFFFFFul, 0x00000000FFFFFFFFul, 0x0000000000000001ul, 0x0000000000000001ul, 0x00000000FFFFFF00ul, 0x00000000FFFFFF00ul)]
-        [TestCase(0x4E228420u,  0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFEFEFEFEFEFEFEFEul, 0xFEFEFEFEFEFEFEFEul)]
-        [TestCase(0x4E228420u,  0x0102030405060708ul, 0xAAAAAAAAAAAAAAAAul, 0x0807060504030201ul, 0x2222222222222222ul, 0x0909090909090909ul, 0xCCCCCCCCCCCCCCCCul)]
-        [TestCase(0xE628420u,   0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul)]
-        [TestCase(0xE628420u,   0x00000000FFFFFFFFul, 0x00000000FFFFFFFFul, 0x0000000000000001ul, 0x0000000000000001ul, 0x00000000FFFF0000ul, 0x0000000000000000ul)]
-        [TestCase(0xE628420u,   0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFEFFFEFFFEFFFEul, 0x0000000000000000ul)]
-        [TestCase(0xE628420u,   0x0102030405060708ul, 0xAAAAAAAAAAAAAAAAul, 0x0807060504030201ul, 0x2222222222222222ul, 0x0909090909090909ul, 0x0000000000000000ul)]
-        [TestCase(0x4E628420u,  0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul)]
-        [TestCase(0x4E628420u,  0x00000000FFFFFFFFul, 0x00000000FFFFFFFFul, 0x0000000000000001ul, 0x0000000000000001ul, 0x00000000FFFF0000ul, 0x00000000FFFF0000ul)]
-        [TestCase(0x4E628420u,  0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFEFFFEFFFEFFFEul, 0xFFFEFFFEFFFEFFFEul)]
-        [TestCase(0x4E628420u,  0x0102030405060708ul, 0xAAAAAAAAAAAAAAAAul, 0x0807060504030201ul, 0x2222222222222222ul, 0x0909090909090909ul, 0xCCCCCCCCCCCCCCCCul)]
-        [TestCase(0xEA28420u,   0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul)]
-        [TestCase(0xEA28420u,   0x00000000FFFFFFFFul, 0x00000000FFFFFFFFul, 0x0000000000000001ul, 0x0000000000000001ul, 0x0000000000000000ul, 0x0000000000000000ul)]
-        [TestCase(0xEA28420u,   0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFEFFFFFFFEul, 0x0000000000000000ul)]
-        [TestCase(0xEA28420u,   0x0102030405060708ul, 0xAAAAAAAAAAAAAAAAul, 0x0807060504030201ul, 0x2222222222222222ul, 0x0909090909090909ul, 0x0000000000000000ul)]
-        [TestCase(0x4EA28420u,  0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul)]
-        [TestCase(0x4EA28420u,  0x00000000FFFFFFFFul, 0x00000000FFFFFFFFul, 0x0000000000000001ul, 0x0000000000000001ul, 0x0000000000000000ul, 0x0000000000000000ul)]
-        [TestCase(0x4EA28420u,  0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFEFFFFFFFEul, 0xFFFFFFFEFFFFFFFEul)]
-        [TestCase(0x4EA28420u,  0x0102030405060708ul, 0xAAAAAAAAAAAAAAAAul, 0x0807060504030201ul, 0x2222222222222222ul, 0x0909090909090909ul, 0xCCCCCCCCCCCCCCCCul)]
-        [TestCase(0x4EE28420u,  0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul)]
-        [TestCase(0x4EE28420u,  0x00000000FFFFFFFFul, 0x00000000FFFFFFFFul, 0x0000000000000001ul, 0x0000000000000001ul, 0x0000000100000000ul, 0x0000000100000000ul)]
-        [TestCase(0x4EE28420u,  0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFEul, 0xFFFFFFFFFFFFFFFEul)]
-        [TestCase(0x4EE28420u,  0x0102030405060708ul, 0xAAAAAAAAAAAAAAAAul, 0x0807060504030201ul, 0x2222222222222222ul, 0x0909090909090909ul, 0xCCCCCCCCCCCCCCCCul)]
-        public void Add_V(uint Opcode, ulong A0, ulong A1, ulong B0, ulong B1, ulong Result0, ulong Result1)
-        {
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
-            Assert.Multiple(() =>
-            {
-                Assert.AreEqual(Result0, GetVectorE0(ThreadState.V0));
-                Assert.AreEqual(Result1, GetVectorE1(ThreadState.V0));
-            });
-        }
-
         [TestCase(0x1E224820u, 0x0000000000000000ul, 0x0000000080000000ul, 0x0000000000000000ul)]
         [TestCase(0x1E224820u, 0x0000000080000000ul, 0x0000000000000000ul, 0x0000000000000000ul)]
         [TestCase(0x1E224820u, 0x0000000080000000ul, 0x0000000080000000ul, 0x0000000080000000ul)]
@@ -195,6 +155,7 @@ namespace Ryujinx.Tests.Cpu
                 V0: Sse.SetAllVector128(B));
 
             float Result = (float)(2 - ((double)A * (double)B));
+
             Assert.Multiple(() =>
             {
                 Assert.That(Sse41.Extract(ThreadState.V4, (byte)0), Is.EqualTo(Result));
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdMove.cs b/Ryujinx.Tests/Cpu/CpuTestSimdMove.cs
deleted file mode 100644
index 055e08689c..0000000000
--- a/Ryujinx.Tests/Cpu/CpuTestSimdMove.cs
+++ /dev/null
@@ -1,136 +0,0 @@
-using ChocolArm64.State;
-
-using NUnit.Framework;
-
-using System.Runtime.Intrinsics;
-using System.Runtime.Intrinsics.X86;
-
-namespace Ryujinx.Tests.Cpu
-{
-    public class CpuTestSimdMove : CpuTest
-    {
-        [Test, Description("TRN1 V0.4S, V1.4S, V2.4S")]
-        public void Trn1_V_4S([Random(2)] uint A0, [Random(2)] uint A1, [Random(2)] uint A2, [Random(2)] uint A3,
-                              [Random(2)] uint B0, [Random(2)] uint B1, [Random(2)] uint B2, [Random(2)] uint B3)
-        {
-            uint Opcode = 0x4E822820;
-            Vector128<float> V1 = Sse.StaticCast<uint, float>(Sse2.SetVector128(A3, A2, A1, A0));
-            Vector128<float> V2 = Sse.StaticCast<uint, float>(Sse2.SetVector128(B3, B2, B1, B0));
-
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
-
-            Assert.Multiple(() =>
-            {
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)0), Is.EqualTo(A0));
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)1), Is.EqualTo(B0));
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)2), Is.EqualTo(A2));
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)3), Is.EqualTo(B2));
-            });
-        }
-
-        [Test, Description("TRN1 V0.8B, V1.8B, V2.8B")]
-        public void Trn1_V_8B([Random(2)] byte A0, [Random(1)] byte A1, [Random(2)] byte A2, [Random(1)] byte A3,
-                              [Random(2)] byte A4, [Random(1)] byte A5, [Random(2)] byte A6, [Random(1)] byte A7,
-                              [Random(2)] byte B0, [Random(1)] byte B1, [Random(2)] byte B2, [Random(1)] byte B3,
-                              [Random(2)] byte B4, [Random(1)] byte B5, [Random(2)] byte B6, [Random(1)] byte B7)
-        {
-            uint Opcode = 0x0E022820;
-            Vector128<float> V1 = Sse.StaticCast<byte, float>(Sse2.SetVector128(0, 0, 0, 0, 0, 0, 0, 0, A7, A6, A5, A4, A3, A2, A1, A0));
-            Vector128<float> V2 = Sse.StaticCast<byte, float>(Sse2.SetVector128(0, 0, 0, 0, 0, 0, 0, 0, B7, B6, B5, B4, B3, B2, B1, B0));
-
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
-
-            Assert.Multiple(() =>
-            {
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)0), Is.EqualTo(A0));
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)1), Is.EqualTo(B0));
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)2), Is.EqualTo(A2));
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)3), Is.EqualTo(B2));
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)4), Is.EqualTo(A4));
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)5), Is.EqualTo(B4));
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)6), Is.EqualTo(A6));
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)7), Is.EqualTo(B6));
-            });
-        }
-
-        [Test, Description("TRN2 V0.4S, V1.4S, V2.4S")]
-        public void Trn2_V_4S([Random(2)] uint A0, [Random(2)] uint A1, [Random(2)] uint A2, [Random(2)] uint A3,
-                              [Random(2)] uint B0, [Random(2)] uint B1, [Random(2)] uint B2, [Random(2)] uint B3)
-        {
-            uint Opcode = 0x4E826820;
-            Vector128<float> V1 = Sse.StaticCast<uint, float>(Sse2.SetVector128(A3, A2, A1, A0));
-            Vector128<float> V2 = Sse.StaticCast<uint, float>(Sse2.SetVector128(B3, B2, B1, B0));
-
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
-
-            Assert.Multiple(() =>
-            {
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)0), Is.EqualTo(A1));
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)1), Is.EqualTo(B1));
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)2), Is.EqualTo(A3));
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)3), Is.EqualTo(B3));
-            });
-        }
-
-        [Test, Description("TRN2 V0.8B, V1.8B, V2.8B")]
-        public void Trn2_V_8B([Random(1)] byte A0, [Random(2)] byte A1, [Random(1)] byte A2, [Random(2)] byte A3,
-                              [Random(1)] byte A4, [Random(2)] byte A5, [Random(1)] byte A6, [Random(2)] byte A7,
-                              [Random(1)] byte B0, [Random(2)] byte B1, [Random(1)] byte B2, [Random(2)] byte B3,
-                              [Random(1)] byte B4, [Random(2)] byte B5, [Random(1)] byte B6, [Random(2)] byte B7)
-        {
-            uint Opcode = 0x0E026820;
-            Vector128<float> V1 = Sse.StaticCast<byte, float>(Sse2.SetVector128(0, 0, 0, 0, 0, 0, 0, 0, A7, A6, A5, A4, A3, A2, A1, A0));
-            Vector128<float> V2 = Sse.StaticCast<byte, float>(Sse2.SetVector128(0, 0, 0, 0, 0, 0, 0, 0, B7, B6, B5, B4, B3, B2, B1, B0));
-
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
-
-            Assert.Multiple(() =>
-            {
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)0), Is.EqualTo(A1));
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)1), Is.EqualTo(B1));
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)2), Is.EqualTo(A3));
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)3), Is.EqualTo(B3));
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)4), Is.EqualTo(A5));
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)5), Is.EqualTo(B5));
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)6), Is.EqualTo(A7));
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)7), Is.EqualTo(B7));
-            });
-        }
-
-        [TestCase(0u, 0u, 0x2313221221112010ul, 0x0000000000000000ul)]
-        [TestCase(1u, 0u, 0x2313221221112010ul, 0x2717261625152414ul)]
-        [TestCase(0u, 1u, 0x2322131221201110ul, 0x0000000000000000ul)]
-        [TestCase(1u, 1u, 0x2322131221201110ul, 0x2726171625241514ul)]
-        [TestCase(0u, 2u, 0x2322212013121110ul, 0x0000000000000000ul)]
-        [TestCase(1u, 2u, 0x2322212013121110ul, 0x2726252417161514ul)]
-        [TestCase(1u, 3u, 0x1716151413121110ul, 0x2726252423222120ul)]
-        public void Zip1_V(uint Q, uint size, ulong Result_0, ulong Result_1)
-        {
-            // ZIP1 V0.<T>, V1.<T>, V2.<T>
-            uint Opcode = 0x0E023820 | (Q << 30) | (size << 22);
-            Vector128<float> V1 = MakeVectorE0E1(0x1716151413121110, 0x1F1E1D1C1B1A1918);
-            Vector128<float> V2 = MakeVectorE0E1(0x2726252423222120, 0x2F2E2D2C2B2A2928);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
-            Assert.AreEqual(Result_0, GetVectorE0(ThreadState.V0));
-            Assert.AreEqual(Result_1, GetVectorE1(ThreadState.V0));
-        }
-
-        [TestCase(0u, 0u, 0x2717261625152414ul, 0x0000000000000000ul)]
-        [TestCase(1u, 0u, 0x2B1B2A1A29192818ul, 0x2F1F2E1E2D1D2C1Cul)]
-        [TestCase(0u, 1u, 0x2726171625241514ul, 0x0000000000000000ul)]
-        [TestCase(1u, 1u, 0x2B2A1B1A29281918ul, 0x2F2E1F1E2D2C1D1Cul)]
-        [TestCase(0u, 2u, 0x2726252417161514ul, 0x0000000000000000ul)]
-        [TestCase(1u, 2u, 0x2B2A29281B1A1918ul, 0x2F2E2D2C1F1E1D1Cul)]
-        [TestCase(1u, 3u, 0x1F1E1D1C1B1A1918ul, 0x2F2E2D2C2B2A2928ul)]
-        public void Zip2_V(uint Q, uint size, ulong Result_0, ulong Result_1)
-        {
-            // ZIP2 V0.<T>, V1.<T>, V2.<T>
-            uint Opcode = 0x0E027820 | (Q << 30) | (size << 22);
-            Vector128<float> V1 = MakeVectorE0E1(0x1716151413121110, 0x1F1E1D1C1B1A1918);
-            Vector128<float> V2 = MakeVectorE0E1(0x2726252423222120, 0x2F2E2D2C2B2A2928);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
-            Assert.AreEqual(Result_0, GetVectorE0(ThreadState.V0));
-            Assert.AreEqual(Result_1, GetVectorE1(ThreadState.V0));
-        }
-    }
-}
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs
index 5e14f55d36..51db857c31 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs
@@ -60,73 +60,90 @@ namespace Ryujinx.Tests.Cpu
         }
 #endregion
 
-        [Test, Description("ADD <V><d>, <V><n>, <V><m>")]
-        public void Add_S_D([ValueSource("_1D_")] [Random(1)] ulong A,
-                            [ValueSource("_1D_")] [Random(1)] ulong B)
+        private const int RndCnt = 4;
+
+        [Test, Pairwise, Description("ADD <V><d>, <V><n>, <V><m>")]
+        public void Add_S_D([Values(0u)] uint Rd,
+                            [Values(1u, 0u)] uint Rn,
+                            [Values(2u, 0u)] uint Rm,
+                            [ValueSource("_1D_")] [Random(RndCnt)] ulong Z,
+                            [ValueSource("_1D_")] [Random(RndCnt)] ulong A,
+                            [ValueSource("_1D_")] [Random(RndCnt)] ulong B)
         {
-            uint Opcode = 0x5EE28420; // ADD D0, D1, D2
+            uint Opcode = 0x5EE08400; // ADD D0, D0, D0
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             Vector128<float> V2 = MakeVectorE0(B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             AArch64.V(2, new Bits(B));
             SimdFp.Add_S(Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
-            });
-        }
-
-        [Test, Description("ADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Add_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A,
-                                   [ValueSource("_8B4H2S_")] [Random(1)] ulong B,
-                                   [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
-        {
-            uint Opcode = 0x0E228420; // ADD V0.8B, V1.8B, V2.8B
-            Opcode |= ((size & 3) << 22);
-            Bits Op = new Bits(Opcode);
-
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0(A);
-            Vector128<float> V2 = MakeVectorE0(B);
-            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
-
-            AArch64.V(1, new Bits(A));
-            AArch64.V(2, new Bits(B));
-            SimdFp.Add_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
-
-            Assert.Multiple(() =>
-            {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Pairwise, Description("ADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Add_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0,
-                                       [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1,
-                                       [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B0,
-                                       [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B1,
-                                       [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
+        public void Add_V_8B_4H_2S([Values(0u)] uint Rd,
+                                   [Values(1u, 0u)] uint Rn,
+                                   [Values(2u, 0u)] uint Rm,
+                                   [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                   [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                   [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                   [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
         {
-            uint Opcode = 0x4E228420; // ADD V0.16B, V1.16B, V2.16B
+            uint Opcode = 0x0E208400; // ADD V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
+            SimdFp.Add_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Pairwise, Description("ADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Add_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                       [Values(1u, 0u)] uint Rn,
+                                       [Values(2u, 0u)] uint Rm,
+                                       [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                       [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
+                                       [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B,
+                                       [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
+        {
+            uint Opcode = 0x4E208400; // ADD V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
             SimdFp.Add_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -137,108 +154,89 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Pairwise, Description("ADDHN{2} <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta>")]
-        public void Addhn_V_8H8B_4S4H_2D2S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0,
-                                           [ValueSource("_4H2S1D_")] [Random(1)] ulong A1,
-                                           [ValueSource("_4H2S1D_")] [Random(1)] ulong B0,
-                                           [ValueSource("_4H2S1D_")] [Random(1)] ulong B1,
+        public void Addhn_V_8H8B_4S4H_2D2S([Values(0u)] uint Rd,
+                                           [Values(1u, 0u)] uint Rn,
+                                           [Values(2u, 0u)] uint Rm,
+                                           [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                           [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A,
+                                           [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong B,
                                            [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H8B, 4S4H, 2D2S>
         {
-            uint Opcode = 0x0E224020; // ADDHN V0.8B, V1.8H, V2.8H
+            uint Opcode = 0x0E204000; // ADDHN V0.8B, V0.8H, V0.8H
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
             SimdFp.Addhn_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Pairwise, Description("ADDHN{2} <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta>")]
-        public void Addhn_V_8H16B_4S8H_2D4S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0,
-                                            [ValueSource("_4H2S1D_")] [Random(1)] ulong A1,
-                                            [ValueSource("_4H2S1D_")] [Random(1)] ulong B0,
-                                            [ValueSource("_4H2S1D_")] [Random(1)] ulong B1,
+        public void Addhn_V_8H16B_4S8H_2D4S([Values(0u)] uint Rd,
+                                            [Values(1u, 0u)] uint Rn,
+                                            [Values(2u, 0u)] uint Rm,
+                                            [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                            [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A,
+                                            [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong B,
                                             [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H16B, 4S8H, 2D4S>
         {
-            uint Opcode = 0x4E224020; // ADDHN2 V0.16B, V1.8H, V2.8H
+            uint Opcode = 0x4E204000; // ADDHN2 V0.16B, V0.8H, V0.8H
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            ulong _E0 = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> V0 = MakeVectorE0(_E0);
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
             SimdFp.Addhn_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(_E0));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
                 Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
-        [Test, Description("ADDP <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Addp_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A,
-                                    [ValueSource("_8B4H2S_")] [Random(1)] ulong B,
-                                    [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
-        {
-            uint Opcode = 0x0E22BC20; // ADDP V0.8B, V1.8B, V2.8B
-            Opcode |= ((size & 3) << 22);
-            Bits Op = new Bits(Opcode);
-
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0(A);
-            Vector128<float> V2 = MakeVectorE0(B);
-            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
-
-            AArch64.V(1, new Bits(A));
-            AArch64.V(2, new Bits(B));
-            SimdFp.Addp_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
-
-            Assert.Multiple(() =>
-            {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
-            });
-        }
-
         [Test, Pairwise, Description("ADDP <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Addp_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0,
-                                        [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1,
-                                        [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B0,
-                                        [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B1,
-                                        [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
+        public void Addp_V_8B_4H_2S([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [Values(2u, 0u)] uint Rm,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                    [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
         {
-            uint Opcode = 0x4E22BC20; // ADDP V0.16B, V1.16B, V2.16B
+            uint Opcode = 0x0E20BC00; // ADDP V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
             SimdFp.Addp_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -248,46 +246,57 @@ namespace Ryujinx.Tests.Cpu
             });
         }
 
-        [Test, Description("AND <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void And_V_8B([ValueSource("_8B_")] [Random(1)] ulong A,
-                             [ValueSource("_8B_")] [Random(1)] ulong B)
+        [Test, Pairwise, Description("ADDP <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Addp_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                        [Values(1u, 0u)] uint Rn,
+                                        [Values(2u, 0u)] uint Rm,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B,
+                                        [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
         {
-            uint Opcode = 0x0E221C20; // AND V0.8B, V1.8B, V2.8B
+            uint Opcode = 0x4E20BC00; // ADDP V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0(A);
-            Vector128<float> V2 = MakeVectorE0(B);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.V(1, new Bits(A));
-            AArch64.V(2, new Bits(B));
-            SimdFp.And_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]);
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
+            SimdFp.Addp_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Pairwise, Description("AND <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void And_V_16B([ValueSource("_8B_")] [Random(1)] ulong A0,
-                              [ValueSource("_8B_")] [Random(1)] ulong A1,
-                              [ValueSource("_8B_")] [Random(1)] ulong B0,
-                              [ValueSource("_8B_")] [Random(1)] ulong B1)
+        public void And_V_8B([Values(0u)] uint Rd,
+                             [Values(1u, 0u)] uint Rn,
+                             [Values(2u, 0u)] uint Rm,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong A,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong B)
         {
-            uint Opcode = 0x4E221C20; // AND V0.16B, V1.16B, V2.16B
+            uint Opcode = 0x0E201C00; // AND V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
             SimdFp.And_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -297,46 +306,55 @@ namespace Ryujinx.Tests.Cpu
             });
         }
 
-        [Test, Description("BIC <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Bic_V_8B([ValueSource("_8B_")] [Random(1)] ulong A,
-                             [ValueSource("_8B_")] [Random(1)] ulong B)
+        [Test, Pairwise, Description("AND <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void And_V_16B([Values(0u)] uint Rd,
+                              [Values(1u, 0u)] uint Rn,
+                              [Values(2u, 0u)] uint Rm,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong A,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong B)
         {
-            uint Opcode = 0x0E621C20; // BIC V0.8B, V1.8B, V2.8B
+            uint Opcode = 0x4E201C00; // AND V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0(A);
-            Vector128<float> V2 = MakeVectorE0(B);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.V(1, new Bits(A));
-            AArch64.V(2, new Bits(B));
-            SimdFp.Bic_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]);
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
+            SimdFp.And_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Pairwise, Description("BIC <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Bic_V_16B([ValueSource("_8B_")] [Random(1)] ulong A0,
-                              [ValueSource("_8B_")] [Random(1)] ulong A1,
-                              [ValueSource("_8B_")] [Random(1)] ulong B0,
-                              [ValueSource("_8B_")] [Random(1)] ulong B1)
+        public void Bic_V_8B([Values(0u)] uint Rd,
+                             [Values(1u, 0u)] uint Rn,
+                             [Values(2u, 0u)] uint Rm,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong A,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong B)
         {
-            uint Opcode = 0x4E621C20; // BIC V0.16B, V1.16B, V2.16B
+            uint Opcode = 0x0E601C00; // BIC V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
             SimdFp.Bic_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -346,53 +364,55 @@ namespace Ryujinx.Tests.Cpu
             });
         }
 
-        [Test, Description("BIF <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Bif_V_8B([ValueSource("_8B_")] [Random(1)] ulong _Z,
-                             [ValueSource("_8B_")] [Random(1)] ulong A,
-                             [ValueSource("_8B_")] [Random(1)] ulong B)
+        [Test, Pairwise, Description("BIC <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Bic_V_16B([Values(0u)] uint Rd,
+                              [Values(1u, 0u)] uint Rn,
+                              [Values(2u, 0u)] uint Rm,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong A,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong B)
         {
-            uint Opcode = 0x2EE21C20; // BIF V0.8B, V1.8B, V2.8B
+            uint Opcode = 0x4E601C00; // BIC V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(_Z, TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0(A);
-            Vector128<float> V2 = MakeVectorE0(B);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(0, 0, new Bits(_Z));
-            AArch64.V(1, new Bits(A));
-            AArch64.V(2, new Bits(B));
-            SimdFp.Bif_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]);
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
+            SimdFp.Bic_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Pairwise, Description("BIF <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Bif_V_16B([ValueSource("_8B_")] [Random(1)] ulong _Z0,
-                              [ValueSource("_8B_")] [Random(1)] ulong _Z1,
-                              [ValueSource("_8B_")] [Random(1)] ulong A0,
-                              [ValueSource("_8B_")] [Random(1)] ulong A1,
-                              [ValueSource("_8B_")] [Random(1)] ulong B0,
-                              [ValueSource("_8B_")] [Random(1)] ulong B1)
+        public void Bif_V_8B([Values(0u)] uint Rd,
+                             [Values(1u, 0u)] uint Rn,
+                             [Values(2u, 0u)] uint Rm,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong A,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong B)
         {
-            uint Opcode = 0x6EE21C20; // BIF V0.16B, V1.16B, V2.16B
+            uint Opcode = 0x2EE01C00; // BIF V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(_Z0, _Z1);
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(0, 0, new Bits(_Z0));
-            AArch64.Vpart(0, 1, new Bits(_Z1));
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
             SimdFp.Bif_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -402,53 +422,55 @@ namespace Ryujinx.Tests.Cpu
             });
         }
 
-        [Test, Description("BIT <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Bit_V_8B([ValueSource("_8B_")] [Random(1)] ulong _Z,
-                             [ValueSource("_8B_")] [Random(1)] ulong A,
-                             [ValueSource("_8B_")] [Random(1)] ulong B)
+        [Test, Pairwise, Description("BIF <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Bif_V_16B([Values(0u)] uint Rd,
+                              [Values(1u, 0u)] uint Rn,
+                              [Values(2u, 0u)] uint Rm,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong A,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong B)
         {
-            uint Opcode = 0x2EA21C20; // BIT V0.8B, V1.8B, V2.8B
+            uint Opcode = 0x6EE01C00; // BIF V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(_Z, TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0(A);
-            Vector128<float> V2 = MakeVectorE0(B);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(0, 0, new Bits(_Z));
-            AArch64.V(1, new Bits(A));
-            AArch64.V(2, new Bits(B));
-            SimdFp.Bit_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]);
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
+            SimdFp.Bif_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Pairwise, Description("BIT <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Bit_V_16B([ValueSource("_8B_")] [Random(1)] ulong _Z0,
-                              [ValueSource("_8B_")] [Random(1)] ulong _Z1,
-                              [ValueSource("_8B_")] [Random(1)] ulong A0,
-                              [ValueSource("_8B_")] [Random(1)] ulong A1,
-                              [ValueSource("_8B_")] [Random(1)] ulong B0,
-                              [ValueSource("_8B_")] [Random(1)] ulong B1)
+        public void Bit_V_8B([Values(0u)] uint Rd,
+                             [Values(1u, 0u)] uint Rn,
+                             [Values(2u, 0u)] uint Rm,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong A,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong B)
         {
-            uint Opcode = 0x6EA21C20; // BIT V0.16B, V1.16B, V2.16B
+            uint Opcode = 0x2EA01C00; // BIT V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(_Z0, _Z1);
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(0, 0, new Bits(_Z0));
-            AArch64.Vpart(0, 1, new Bits(_Z1));
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
             SimdFp.Bit_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -458,53 +480,55 @@ namespace Ryujinx.Tests.Cpu
             });
         }
 
-        [Test, Description("BSL <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Bsl_V_8B([ValueSource("_8B_")] [Random(1)] ulong _Z,
-                             [ValueSource("_8B_")] [Random(1)] ulong A,
-                             [ValueSource("_8B_")] [Random(1)] ulong B)
+        [Test, Pairwise, Description("BIT <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Bit_V_16B([Values(0u)] uint Rd,
+                              [Values(1u, 0u)] uint Rn,
+                              [Values(2u, 0u)] uint Rm,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong A,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong B)
         {
-            uint Opcode = 0x2E621C20; // BSL V0.8B, V1.8B, V2.8B
+            uint Opcode = 0x6EA01C00; // BIT V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(_Z, TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0(A);
-            Vector128<float> V2 = MakeVectorE0(B);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(0, 0, new Bits(_Z));
-            AArch64.V(1, new Bits(A));
-            AArch64.V(2, new Bits(B));
-            SimdFp.Bsl_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]);
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
+            SimdFp.Bit_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Pairwise, Description("BSL <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Bsl_V_16B([ValueSource("_8B_")] [Random(1)] ulong _Z0,
-                              [ValueSource("_8B_")] [Random(1)] ulong _Z1,
-                              [ValueSource("_8B_")] [Random(1)] ulong A0,
-                              [ValueSource("_8B_")] [Random(1)] ulong A1,
-                              [ValueSource("_8B_")] [Random(1)] ulong B0,
-                              [ValueSource("_8B_")] [Random(1)] ulong B1)
+        public void Bsl_V_8B([Values(0u)] uint Rd,
+                             [Values(1u, 0u)] uint Rn,
+                             [Values(2u, 0u)] uint Rm,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong A,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong B)
         {
-            uint Opcode = 0x6E621C20; // BSL V0.16B, V1.16B, V2.16B
+            uint Opcode = 0x2E601C00; // BSL V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(_Z0, _Z1);
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(0, 0, new Bits(_Z0));
-            AArch64.Vpart(0, 1, new Bits(_Z1));
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
             SimdFp.Bsl_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -514,73 +538,86 @@ namespace Ryujinx.Tests.Cpu
             });
         }
 
-        [Test, Description("CMEQ <V><d>, <V><n>, <V><m>")]
-        public void Cmeq_S_D([ValueSource("_1D_")] [Random(1)] ulong A,
-                             [ValueSource("_1D_")] [Random(1)] ulong B)
+        [Test, Pairwise, Description("BSL <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Bsl_V_16B([Values(0u)] uint Rd,
+                              [Values(1u, 0u)] uint Rn,
+                              [Values(2u, 0u)] uint Rm,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong A,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong B)
         {
-            uint Opcode = 0x7EE28C20; // CMEQ D0, D1, D2
+            uint Opcode = 0x6E601C00; // BSL V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
+            SimdFp.Bsl_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Pairwise, Description("CMEQ <V><d>, <V><n>, <V><m>")]
+        public void Cmeq_S_D([Values(0u)] uint Rd,
+                             [Values(1u, 0u)] uint Rn,
+                             [Values(2u, 0u)] uint Rm,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong Z,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong A,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong B)
+        {
+            uint Opcode = 0x7EE08C00; // CMEQ D0, D0, D0
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             Vector128<float> V2 = MakeVectorE0(B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             AArch64.V(2, new Bits(B));
             SimdFp.Cmeq_Reg_S(Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
-            });
-        }
-
-        [Test, Description("CMEQ <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Cmeq_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A,
-                                    [ValueSource("_8B4H2S_")] [Random(1)] ulong B,
-                                    [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
-        {
-            uint Opcode = 0x2E228C20; // CMEQ V0.8B, V1.8B, V2.8B
-            Opcode |= ((size & 3) << 22);
-            Bits Op = new Bits(Opcode);
-
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0(A);
-            Vector128<float> V2 = MakeVectorE0(B);
-            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
-
-            AArch64.V(1, new Bits(A));
-            AArch64.V(2, new Bits(B));
-            SimdFp.Cmeq_Reg_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
-
-            Assert.Multiple(() =>
-            {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Pairwise, Description("CMEQ <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Cmeq_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0,
-                                        [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1,
-                                        [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B0,
-                                        [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B1,
-                                        [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
+        public void Cmeq_V_8B_4H_2S([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [Values(2u, 0u)] uint Rm,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                    [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
         {
-            uint Opcode = 0x6E228C20; // CMEQ V0.16B, V1.16B, V2.16B
+            uint Opcode = 0x2E208C00; // CMEQ V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
             SimdFp.Cmeq_Reg_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -590,73 +627,88 @@ namespace Ryujinx.Tests.Cpu
             });
         }
 
-        [Test, Description("CMGE <V><d>, <V><n>, <V><m>")]
-        public void Cmge_S_D([ValueSource("_1D_")] [Random(1)] ulong A,
-                             [ValueSource("_1D_")] [Random(1)] ulong B)
+        [Test, Pairwise, Description("CMEQ <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Cmeq_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                        [Values(1u, 0u)] uint Rn,
+                                        [Values(2u, 0u)] uint Rm,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B,
+                                        [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
         {
-            uint Opcode = 0x5EE23C20; // CMGE D0, D1, D2
+            uint Opcode = 0x6E208C00; // CMEQ V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
+            SimdFp.Cmeq_Reg_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Pairwise, Description("CMGE <V><d>, <V><n>, <V><m>")]
+        public void Cmge_S_D([Values(0u)] uint Rd,
+                             [Values(1u, 0u)] uint Rn,
+                             [Values(2u, 0u)] uint Rm,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong Z,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong A,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong B)
+        {
+            uint Opcode = 0x5EE03C00; // CMGE D0, D0, D0
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             Vector128<float> V2 = MakeVectorE0(B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             AArch64.V(2, new Bits(B));
             SimdFp.Cmge_Reg_S(Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
-            });
-        }
-
-        [Test, Description("CMGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Cmge_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A,
-                                    [ValueSource("_8B4H2S_")] [Random(1)] ulong B,
-                                    [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
-        {
-            uint Opcode = 0x0E223C20; // CMGE V0.8B, V1.8B, V2.8B
-            Opcode |= ((size & 3) << 22);
-            Bits Op = new Bits(Opcode);
-
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0(A);
-            Vector128<float> V2 = MakeVectorE0(B);
-            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
-
-            AArch64.V(1, new Bits(A));
-            AArch64.V(2, new Bits(B));
-            SimdFp.Cmge_Reg_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
-
-            Assert.Multiple(() =>
-            {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Pairwise, Description("CMGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Cmge_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0,
-                                        [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1,
-                                        [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B0,
-                                        [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B1,
-                                        [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
+        public void Cmge_V_8B_4H_2S([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [Values(2u, 0u)] uint Rm,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                    [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
         {
-            uint Opcode = 0x4E223C20; // CMGE V0.16B, V1.16B, V2.16B
+            uint Opcode = 0x0E203C00; // CMGE V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
             SimdFp.Cmge_Reg_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -666,73 +718,88 @@ namespace Ryujinx.Tests.Cpu
             });
         }
 
-        [Test, Description("CMGT <V><d>, <V><n>, <V><m>")]
-        public void Cmgt_S_D([ValueSource("_1D_")] [Random(1)] ulong A,
-                             [ValueSource("_1D_")] [Random(1)] ulong B)
+        [Test, Pairwise, Description("CMGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Cmge_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                        [Values(1u, 0u)] uint Rn,
+                                        [Values(2u, 0u)] uint Rm,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B,
+                                        [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
         {
-            uint Opcode = 0x5EE23420; // CMGT D0, D1, D2
+            uint Opcode = 0x4E203C00; // CMGE V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
+            SimdFp.Cmge_Reg_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Pairwise, Description("CMGT <V><d>, <V><n>, <V><m>")]
+        public void Cmgt_S_D([Values(0u)] uint Rd,
+                             [Values(1u, 0u)] uint Rn,
+                             [Values(2u, 0u)] uint Rm,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong Z,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong A,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong B)
+        {
+            uint Opcode = 0x5EE03400; // CMGT D0, D0, D0
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             Vector128<float> V2 = MakeVectorE0(B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             AArch64.V(2, new Bits(B));
             SimdFp.Cmgt_Reg_S(Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
-            });
-        }
-
-        [Test, Description("CMGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Cmgt_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A,
-                                    [ValueSource("_8B4H2S_")] [Random(1)] ulong B,
-                                    [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
-        {
-            uint Opcode = 0x0E223420; // CMGT V0.8B, V1.8B, V2.8B
-            Opcode |= ((size & 3) << 22);
-            Bits Op = new Bits(Opcode);
-
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0(A);
-            Vector128<float> V2 = MakeVectorE0(B);
-            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
-
-            AArch64.V(1, new Bits(A));
-            AArch64.V(2, new Bits(B));
-            SimdFp.Cmgt_Reg_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
-
-            Assert.Multiple(() =>
-            {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Pairwise, Description("CMGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Cmgt_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0,
-                                        [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1,
-                                        [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B0,
-                                        [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B1,
-                                        [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
+        public void Cmgt_V_8B_4H_2S([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [Values(2u, 0u)] uint Rm,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                    [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
         {
-            uint Opcode = 0x4E223420; // CMGT V0.16B, V1.16B, V2.16B
+            uint Opcode = 0x0E203400; // CMGT V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
             SimdFp.Cmgt_Reg_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -742,73 +809,88 @@ namespace Ryujinx.Tests.Cpu
             });
         }
 
-        [Test, Description("CMHI <V><d>, <V><n>, <V><m>")]
-        public void Cmhi_S_D([ValueSource("_1D_")] [Random(1)] ulong A,
-                             [ValueSource("_1D_")] [Random(1)] ulong B)
+        [Test, Pairwise, Description("CMGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Cmgt_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                        [Values(1u, 0u)] uint Rn,
+                                        [Values(2u, 0u)] uint Rm,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B,
+                                        [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
         {
-            uint Opcode = 0x7EE23420; // CMHI D0, D1, D2
+            uint Opcode = 0x4E203400; // CMGT V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
+            SimdFp.Cmgt_Reg_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Pairwise, Description("CMHI <V><d>, <V><n>, <V><m>")]
+        public void Cmhi_S_D([Values(0u)] uint Rd,
+                             [Values(1u, 0u)] uint Rn,
+                             [Values(2u, 0u)] uint Rm,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong Z,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong A,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong B)
+        {
+            uint Opcode = 0x7EE03400; // CMHI D0, D0, D0
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             Vector128<float> V2 = MakeVectorE0(B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             AArch64.V(2, new Bits(B));
             SimdFp.Cmhi_S(Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
-            });
-        }
-
-        [Test, Description("CMHI <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Cmhi_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A,
-                                    [ValueSource("_8B4H2S_")] [Random(1)] ulong B,
-                                    [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
-        {
-            uint Opcode = 0x2E223420; // CMHI V0.8B, V1.8B, V2.8B
-            Opcode |= ((size & 3) << 22);
-            Bits Op = new Bits(Opcode);
-
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0(A);
-            Vector128<float> V2 = MakeVectorE0(B);
-            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
-
-            AArch64.V(1, new Bits(A));
-            AArch64.V(2, new Bits(B));
-            SimdFp.Cmhi_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
-
-            Assert.Multiple(() =>
-            {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Pairwise, Description("CMHI <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Cmhi_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0,
-                                        [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1,
-                                        [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B0,
-                                        [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B1,
-                                        [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
+        public void Cmhi_V_8B_4H_2S([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [Values(2u, 0u)] uint Rm,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                    [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
         {
-            uint Opcode = 0x6E223420; // CMHI V0.16B, V1.16B, V2.16B
+            uint Opcode = 0x2E203400; // CMHI V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
             SimdFp.Cmhi_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -818,73 +900,88 @@ namespace Ryujinx.Tests.Cpu
             });
         }
 
-        [Test, Description("CMHS <V><d>, <V><n>, <V><m>")]
-        public void Cmhs_S_D([ValueSource("_1D_")] [Random(1)] ulong A,
-                             [ValueSource("_1D_")] [Random(1)] ulong B)
+        [Test, Pairwise, Description("CMHI <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Cmhi_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                        [Values(1u, 0u)] uint Rn,
+                                        [Values(2u, 0u)] uint Rm,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B,
+                                        [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
         {
-            uint Opcode = 0x7EE23C20; // CMHS D0, D1, D2
+            uint Opcode = 0x6E203400; // CMHI V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
+            SimdFp.Cmhi_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Pairwise, Description("CMHS <V><d>, <V><n>, <V><m>")]
+        public void Cmhs_S_D([Values(0u)] uint Rd,
+                             [Values(1u, 0u)] uint Rn,
+                             [Values(2u, 0u)] uint Rm,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong Z,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong A,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong B)
+        {
+            uint Opcode = 0x7EE03C00; // CMHS D0, D0, D0
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             Vector128<float> V2 = MakeVectorE0(B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             AArch64.V(2, new Bits(B));
             SimdFp.Cmhs_S(Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
-            });
-        }
-
-        [Test, Description("CMHS <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Cmhs_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A,
-                                    [ValueSource("_8B4H2S_")] [Random(1)] ulong B,
-                                    [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
-        {
-            uint Opcode = 0x2E223C20; // CMHS V0.8B, V1.8B, V2.8B
-            Opcode |= ((size & 3) << 22);
-            Bits Op = new Bits(Opcode);
-
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0(A);
-            Vector128<float> V2 = MakeVectorE0(B);
-            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
-
-            AArch64.V(1, new Bits(A));
-            AArch64.V(2, new Bits(B));
-            SimdFp.Cmhs_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
-
-            Assert.Multiple(() =>
-            {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Pairwise, Description("CMHS <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Cmhs_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0,
-                                        [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1,
-                                        [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B0,
-                                        [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B1,
-                                        [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
+        public void Cmhs_V_8B_4H_2S([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [Values(2u, 0u)] uint Rm,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                    [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
         {
-            uint Opcode = 0x6E223C20; // CMHS V0.16B, V1.16B, V2.16B
+            uint Opcode = 0x2E203C00; // CMHS V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
             SimdFp.Cmhs_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -894,73 +991,88 @@ namespace Ryujinx.Tests.Cpu
             });
         }
 
-        [Test, Description("CMTST <V><d>, <V><n>, <V><m>")]
-        public void Cmtst_S_D([ValueSource("_1D_")] [Random(1)] ulong A,
-                              [ValueSource("_1D_")] [Random(1)] ulong B)
+        [Test, Pairwise, Description("CMHS <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Cmhs_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                        [Values(1u, 0u)] uint Rn,
+                                        [Values(2u, 0u)] uint Rm,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B,
+                                        [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
         {
-            uint Opcode = 0x5EE28C20; // CMTST D0, D1, D2
+            uint Opcode = 0x6E203C00; // CMHS V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
+            SimdFp.Cmhs_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Pairwise, Description("CMTST <V><d>, <V><n>, <V><m>")]
+        public void Cmtst_S_D([Values(0u)] uint Rd,
+                              [Values(1u, 0u)] uint Rn,
+                              [Values(2u, 0u)] uint Rm,
+                              [ValueSource("_1D_")] [Random(RndCnt)] ulong Z,
+                              [ValueSource("_1D_")] [Random(RndCnt)] ulong A,
+                              [ValueSource("_1D_")] [Random(RndCnt)] ulong B)
+        {
+            uint Opcode = 0x5EE08C00; // CMTST D0, D0, D0
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             Vector128<float> V2 = MakeVectorE0(B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             AArch64.V(2, new Bits(B));
             SimdFp.Cmtst_S(Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
-            });
-        }
-
-        [Test, Description("CMTST <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Cmtst_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A,
-                                     [ValueSource("_8B4H2S_")] [Random(1)] ulong B,
-                                     [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
-        {
-            uint Opcode = 0x0E228C20; // CMTST V0.8B, V1.8B, V2.8B
-            Opcode |= ((size & 3) << 22);
-            Bits Op = new Bits(Opcode);
-
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0(A);
-            Vector128<float> V2 = MakeVectorE0(B);
-            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
-
-            AArch64.V(1, new Bits(A));
-            AArch64.V(2, new Bits(B));
-            SimdFp.Cmtst_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
-
-            Assert.Multiple(() =>
-            {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Pairwise, Description("CMTST <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Cmtst_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0,
-                                         [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1,
-                                         [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B0,
-                                         [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B1,
-                                         [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
+        public void Cmtst_V_8B_4H_2S([Values(0u)] uint Rd,
+                                     [Values(1u, 0u)] uint Rn,
+                                     [Values(2u, 0u)] uint Rm,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                     [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
         {
-            uint Opcode = 0x4E228C20; // CMTST V0.16B, V1.16B, V2.16B
+            uint Opcode = 0x0E208C00; // CMTST V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
             SimdFp.Cmtst_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -970,46 +1082,57 @@ namespace Ryujinx.Tests.Cpu
             });
         }
 
-        [Test, Description("EOR <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Eor_V_8B([ValueSource("_8B_")] [Random(1)] ulong A,
-                             [ValueSource("_8B_")] [Random(1)] ulong B)
+        [Test, Pairwise, Description("CMTST <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Cmtst_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                         [Values(1u, 0u)] uint Rn,
+                                         [Values(2u, 0u)] uint Rm,
+                                         [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                         [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
+                                         [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B,
+                                         [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
         {
-            uint Opcode = 0x2E221C20; // EOR V0.8B, V1.8B, V2.8B
+            uint Opcode = 0x4E208C00; // CMTST V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0(A);
-            Vector128<float> V2 = MakeVectorE0(B);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.V(1, new Bits(A));
-            AArch64.V(2, new Bits(B));
-            SimdFp.Eor_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]);
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
+            SimdFp.Cmtst_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Pairwise, Description("EOR <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Eor_V_16B([ValueSource("_8B_")] [Random(1)] ulong A0,
-                              [ValueSource("_8B_")] [Random(1)] ulong A1,
-                              [ValueSource("_8B_")] [Random(1)] ulong B0,
-                              [ValueSource("_8B_")] [Random(1)] ulong B1)
+        public void Eor_V_8B([Values(0u)] uint Rd,
+                             [Values(1u, 0u)] uint Rn,
+                             [Values(2u, 0u)] uint Rm,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong A,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong B)
         {
-            uint Opcode = 0x6E221C20; // EOR V0.16B, V1.16B, V2.16B
+            uint Opcode = 0x2E201C00; // EOR V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
             SimdFp.Eor_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -1019,46 +1142,55 @@ namespace Ryujinx.Tests.Cpu
             });
         }
 
-        [Test, Description("ORN <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Orn_V_8B([ValueSource("_8B_")] [Random(1)] ulong A,
-                             [ValueSource("_8B_")] [Random(1)] ulong B)
+        [Test, Pairwise, Description("EOR <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Eor_V_16B([Values(0u)] uint Rd,
+                              [Values(1u, 0u)] uint Rn,
+                              [Values(2u, 0u)] uint Rm,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong A,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong B)
         {
-            uint Opcode = 0x0EE21C20; // ORN V0.8B, V1.8B, V2.8B
+            uint Opcode = 0x6E201C00; // EOR V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0(A);
-            Vector128<float> V2 = MakeVectorE0(B);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.V(1, new Bits(A));
-            AArch64.V(2, new Bits(B));
-            SimdFp.Orn_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]);
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
+            SimdFp.Eor_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Pairwise, Description("ORN <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Orn_V_16B([ValueSource("_8B_")] [Random(1)] ulong A0,
-                              [ValueSource("_8B_")] [Random(1)] ulong A1,
-                              [ValueSource("_8B_")] [Random(1)] ulong B0,
-                              [ValueSource("_8B_")] [Random(1)] ulong B1)
+        public void Orn_V_8B([Values(0u)] uint Rd,
+                             [Values(1u, 0u)] uint Rn,
+                             [Values(2u, 0u)] uint Rm,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong A,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong B)
         {
-            uint Opcode = 0x4EE21C20; // ORN V0.16B, V1.16B, V2.16B
+            uint Opcode = 0x0EE01C00; // ORN V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
             SimdFp.Orn_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -1068,46 +1200,84 @@ namespace Ryujinx.Tests.Cpu
             });
         }
 
-        [Test, Description("ORR <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Orr_V_8B([ValueSource("_8B_")] [Random(1)] ulong A,
-                             [ValueSource("_8B_")] [Random(1)] ulong B)
+        [Test, Pairwise, Description("ORN <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Orn_V_16B([Values(0u)] uint Rd,
+                              [Values(1u, 0u)] uint Rn,
+                              [Values(2u, 0u)] uint Rm,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong A,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong B)
         {
-            uint Opcode = 0x0EA21C20; // ORR V0.8B, V1.8B, V2.8B
+            uint Opcode = 0x4EE01C00; // ORN V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0(A);
-            Vector128<float> V2 = MakeVectorE0(B);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.V(1, new Bits(A));
-            AArch64.V(2, new Bits(B));
-            SimdFp.Orr_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]);
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
+            SimdFp.Orn_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Pairwise, Description("ORR <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Orr_V_16B([ValueSource("_8B_")] [Random(1)] ulong A0,
-                              [ValueSource("_8B_")] [Random(1)] ulong A1,
-                              [ValueSource("_8B_")] [Random(1)] ulong B0,
-                              [ValueSource("_8B_")] [Random(1)] ulong B1)
+        public void Orr_V_8B([Values(0u)] uint Rd,
+                             [Values(1u, 0u)] uint Rn,
+                             [Values(2u, 0u)] uint Rm,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong A,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong B)
         {
-            uint Opcode = 0x4EA21C20; // ORR V0.16B, V1.16B, V2.16B
+            uint Opcode = 0x0EA01C00; // ORR V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
+            SimdFp.Orr_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Pairwise, Description("ORR <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Orr_V_16B([Values(0u)] uint Rd,
+                              [Values(1u, 0u)] uint Rn,
+                              [Values(2u, 0u)] uint Rm,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong A,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong B)
+        {
+            uint Opcode = 0x4EA01C00; // ORR V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
             SimdFp.Orr_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -1118,174 +1288,182 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Pairwise, Description("RADDHN{2} <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta>")]
-        public void Raddhn_V_8H8B_4S4H_2D2S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0,
-                                            [ValueSource("_4H2S1D_")] [Random(1)] ulong A1,
-                                            [ValueSource("_4H2S1D_")] [Random(1)] ulong B0,
-                                            [ValueSource("_4H2S1D_")] [Random(1)] ulong B1,
+        public void Raddhn_V_8H8B_4S4H_2D2S([Values(0u)] uint Rd,
+                                            [Values(1u, 0u)] uint Rn,
+                                            [Values(2u, 0u)] uint Rm,
+                                            [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                            [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A,
+                                            [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong B,
                                             [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H8B, 4S4H, 2D2S>
         {
-            uint Opcode = 0x2E224020; // RADDHN V0.8B, V1.8H, V2.8H
+            uint Opcode = 0x2E204000; // RADDHN V0.8B, V0.8H, V0.8H
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
             SimdFp.Raddhn_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Pairwise, Description("RADDHN{2} <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta>")]
-        public void Raddhn_V_8H16B_4S8H_2D4S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0,
-                                             [ValueSource("_4H2S1D_")] [Random(1)] ulong A1,
-                                             [ValueSource("_4H2S1D_")] [Random(1)] ulong B0,
-                                             [ValueSource("_4H2S1D_")] [Random(1)] ulong B1,
+        public void Raddhn_V_8H16B_4S8H_2D4S([Values(0u)] uint Rd,
+                                             [Values(1u, 0u)] uint Rn,
+                                             [Values(2u, 0u)] uint Rm,
+                                             [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                             [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A,
+                                             [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong B,
                                              [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H16B, 4S8H, 2D4S>
         {
-            uint Opcode = 0x6E224020; // RADDHN2 V0.16B, V1.8H, V2.8H
+            uint Opcode = 0x6E204000; // RADDHN2 V0.16B, V0.8H, V0.8H
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            ulong _E0 = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> V0 = MakeVectorE0(_E0);
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
             SimdFp.Raddhn_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(_E0));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
                 Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Pairwise, Description("RSUBHN{2} <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta>")]
-        public void Rsubhn_V_8H8B_4S4H_2D2S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0,
-                                            [ValueSource("_4H2S1D_")] [Random(1)] ulong A1,
-                                            [ValueSource("_4H2S1D_")] [Random(1)] ulong B0,
-                                            [ValueSource("_4H2S1D_")] [Random(1)] ulong B1,
+        public void Rsubhn_V_8H8B_4S4H_2D2S([Values(0u)] uint Rd,
+                                            [Values(1u, 0u)] uint Rn,
+                                            [Values(2u, 0u)] uint Rm,
+                                            [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                            [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A,
+                                            [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong B,
                                             [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H8B, 4S4H, 2D2S>
         {
-            uint Opcode = 0x2E226020; // RSUBHN V0.8B, V1.8H, V2.8H
+            uint Opcode = 0x2E206000; // RSUBHN V0.8B, V0.8H, V0.8H
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
             SimdFp.Rsubhn_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
-            });
-        }
-
-        [Test, Pairwise, Description("RSUBHN{2} <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta>")]
-        public void Rsubhn_V_8H16B_4S8H_2D4S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0,
-                                             [ValueSource("_4H2S1D_")] [Random(1)] ulong A1,
-                                             [ValueSource("_4H2S1D_")] [Random(1)] ulong B0,
-                                             [ValueSource("_4H2S1D_")] [Random(1)] ulong B1,
-                                             [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H16B, 4S8H, 2D4S>
-        {
-            uint Opcode = 0x6E226020; // RSUBHN2 V0.16B, V1.8H, V2.8H
-            Opcode |= ((size & 3) << 22);
-            Bits Op = new Bits(Opcode);
-
-            ulong _E0 = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> V0 = MakeVectorE0(_E0);
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
-
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
-            SimdFp.Rsubhn_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
-
-            Assert.Multiple(() =>
-            {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(_E0));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
                 Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
-        [Test, Description("SABA <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Saba_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong _Z,
-                                    [ValueSource("_8B4H2S_")] [Random(1)] ulong A,
-                                    [ValueSource("_8B4H2S_")] [Random(1)] ulong B,
-                                    [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
+        [Test, Pairwise, Description("RSUBHN{2} <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta>")]
+        public void Rsubhn_V_8H16B_4S8H_2D4S([Values(0u)] uint Rd,
+                                             [Values(1u, 0u)] uint Rn,
+                                             [Values(2u, 0u)] uint Rm,
+                                             [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                             [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A,
+                                             [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong B,
+                                             [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H16B, 4S8H, 2D4S>
         {
-            uint Opcode = 0x0E227C20; // SABA V0.8B, V1.8B, V2.8B
+            uint Opcode = 0x6E206000; // RSUBHN2 V0.16B, V0.8H, V0.8H
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(_Z, TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0(A);
-            Vector128<float> V2 = MakeVectorE0(B);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(0, 0, new Bits(_Z));
-            AArch64.V(1, new Bits(A));
-            AArch64.V(2, new Bits(B));
-            SimdFp.Saba_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
+            SimdFp.Rsubhn_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Pairwise, Description("SABA <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Saba_V_16B_8H_4S([ValueSource("_8B4H2S_")] [Random(1)] ulong _Z0,
-                                     [ValueSource("_8B4H2S_")] [Random(1)] ulong _Z1,
-                                     [ValueSource("_8B4H2S_")] [Random(1)] ulong A0,
-                                     [ValueSource("_8B4H2S_")] [Random(1)] ulong A1,
-                                     [ValueSource("_8B4H2S_")] [Random(1)] ulong B0,
-                                     [ValueSource("_8B4H2S_")] [Random(1)] ulong B1,
-                                     [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S>
+        public void Saba_V_8B_4H_2S([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [Values(2u, 0u)] uint Rm,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                    [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
         {
-            uint Opcode = 0x4E227C20; // SABA V0.16B, V1.16B, V2.16B
+            uint Opcode = 0x0E207C00; // SABA V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(_Z0, _Z1);
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(0, 0, new Bits(_Z0));
-            AArch64.Vpart(0, 1, new Bits(_Z1));
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
+            SimdFp.Saba_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Pairwise, Description("SABA <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Saba_V_16B_8H_4S([Values(0u)] uint Rd,
+                                     [Values(1u, 0u)] uint Rn,
+                                     [Values(2u, 0u)] uint Rm,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                     [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S>
+        {
+            uint Opcode = 0x4E207C00; // SABA V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
             SimdFp.Saba_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -1296,25 +1474,27 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Pairwise, Description("SABAL{2} <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb>")]
-        public void Sabal_V_8B8H_4H4S_2S2D([ValueSource("_8B4H2S_")] [Random(1)] ulong _Z0,
-                                           [ValueSource("_8B4H2S_")] [Random(1)] ulong _Z1,
-                                           [ValueSource("_8B4H2S_")] [Random(1)] ulong A0,
-                                           [ValueSource("_8B4H2S_")] [Random(1)] ulong B0,
-                                           [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
+        public void Sabal_V_8B8H_4H4S_2S2D([Values(0u)] uint Rd,
+                                           [Values(1u, 0u)] uint Rn,
+                                           [Values(2u, 0u)] uint Rm,
+                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                           [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B8H, 4H4S, 2S2D>
         {
-            uint Opcode = 0x0E225020; // SABAL V0.8H, V1.8B, V2.8B
+            uint Opcode = 0x0E205000; // SABAL V0.8H, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(_Z0, _Z1);
-            Vector128<float> V1 = MakeVectorE0(A0);
-            Vector128<float> V2 = MakeVectorE0(B0);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(0, 0, new Bits(_Z0));
-            AArch64.Vpart(0, 1, new Bits(_Z1));
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(2, 0, new Bits(B0));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B));
             SimdFp.Sabal_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -1325,25 +1505,27 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Pairwise, Description("SABAL{2} <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb>")]
-        public void Sabal_V_16B8H_8H4S_4S2D([ValueSource("_8B4H2S_")] [Random(1)] ulong _Z0,
-                                            [ValueSource("_8B4H2S_")] [Random(1)] ulong _Z1,
-                                            [ValueSource("_8B4H2S_")] [Random(1)] ulong A1,
-                                            [ValueSource("_8B4H2S_")] [Random(1)] ulong B1,
-                                            [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S>
+        public void Sabal_V_16B8H_8H4S_4S2D([Values(0u)] uint Rd,
+                                            [Values(1u, 0u)] uint Rn,
+                                            [Values(2u, 0u)] uint Rm,
+                                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                            [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H, 8H4S, 4S2D>
         {
-            uint Opcode = 0x4E225020; // SABAL2 V0.8H, V1.16B, V2.16B
+            uint Opcode = 0x4E205000; // SABAL2 V0.8H, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(_Z0, _Z1);
-            Vector128<float> V1 = MakeVectorE1(A1);
-            Vector128<float> V2 = MakeVectorE1(B1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE1(A);
+            Vector128<float> V2 = MakeVectorE1(B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(0, 0, new Bits(_Z0));
-            AArch64.Vpart(0, 1, new Bits(_Z1));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 1, new Bits(B));
             SimdFp.Sabal_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -1353,53 +1535,59 @@ namespace Ryujinx.Tests.Cpu
             });
         }
 
-        [Test, Description("SABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Sabd_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A,
-                                    [ValueSource("_8B4H2S_")] [Random(1)] ulong B,
+        [Test, Pairwise, Description("SABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Sabd_V_8B_4H_2S([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [Values(2u, 0u)] uint Rm,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
                                     [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
         {
-            uint Opcode = 0x0E227420; // SABD V0.8B, V1.8B, V2.8B
+            uint Opcode = 0x0E207400; // SABD V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(),
-                                                 TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             Vector128<float> V2 = MakeVectorE0(B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             AArch64.V(2, new Bits(B));
             SimdFp.Sabd_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Pairwise, Description("SABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Sabd_V_16B_8H_4S([ValueSource("_8B4H2S_")] [Random(1)] ulong A0,
-                                     [ValueSource("_8B4H2S_")] [Random(1)] ulong A1,
-                                     [ValueSource("_8B4H2S_")] [Random(1)] ulong B0,
-                                     [ValueSource("_8B4H2S_")] [Random(1)] ulong B1,
+        public void Sabd_V_16B_8H_4S([Values(0u)] uint Rd,
+                                     [Values(1u, 0u)] uint Rn,
+                                     [Values(2u, 0u)] uint Rm,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
                                      [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S>
         {
-            uint Opcode = 0x4E227420; // SABD V0.16B, V1.16B, V2.16B
+            uint Opcode = 0x4E207400; // SABD V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(),
-                                                 TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
             SimdFp.Sabd_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -1409,125 +1597,150 @@ namespace Ryujinx.Tests.Cpu
             });
         }
 
-        [Test, Description("SABDL{2} <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb>")]
-        public void Sabdl_V_8B8H_4H4S_2S2D([ValueSource("_8B4H2S_")] [Random(1)] ulong A0,
-                                           [ValueSource("_8B4H2S_")] [Random(1)] ulong B0,
-                                           [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
+        [Test, Pairwise, Description("SABDL{2} <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb>")]
+        public void Sabdl_V_8B8H_4H4S_2S2D([Values(0u)] uint Rd,
+                                           [Values(1u, 0u)] uint Rn,
+                                           [Values(2u, 0u)] uint Rm,
+                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                           [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B8H, 4H4S, 2S2D>
         {
-            uint Opcode = 0x0E227020; // SABDL V0.8H, V1.8B, V2.8B
+            uint Opcode = 0x0E207000; // SABDL V0.8H, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(),
-                                                 TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0(A0);
-            Vector128<float> V2 = MakeVectorE0(B0);
-            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
-
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            SimdFp.Sabdl_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
-
-            Assert.Multiple(() =>
-            {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
-            });
-        }
-
-        [Test, Description("SABDL{2} <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb>")]
-        public void Sabdl_V_16B8H_8H4S_4S2D([ValueSource("_8B4H2S_")] [Random(1)] ulong A1,
-                                            [ValueSource("_8B4H2S_")] [Random(1)] ulong B1,
-                                            [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S>
-        {
-            uint Opcode = 0x4E227020; // SABDL2 V0.8H, V1.16B, V2.16B
-            Opcode |= ((size & 3) << 22);
-            Bits Op = new Bits(Opcode);
-
-            Vector128<float> V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(),
-                                                 TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE1(A1);
-            Vector128<float> V2 = MakeVectorE1(B1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
-
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 1, new Bits(B1));
-            SimdFp.Sabdl_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
-
-            Assert.Multiple(() =>
-            {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
-            });
-        }
-
-        [Test, Description("SUB <V><d>, <V><n>, <V><m>")]
-        public void Sub_S_D([ValueSource("_1D_")] [Random(1)] ulong A,
-                            [ValueSource("_1D_")] [Random(1)] ulong B)
-        {
-            uint Opcode = 0x7EE28420; // SUB D0, D1, D2
-            Bits Op = new Bits(Opcode);
-
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             Vector128<float> V2 = MakeVectorE0(B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B));
+            SimdFp.Sabdl_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Pairwise, Description("SABDL{2} <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb>")]
+        public void Sabdl_V_16B8H_8H4S_4S2D([Values(0u)] uint Rd,
+                                            [Values(1u, 0u)] uint Rn,
+                                            [Values(2u, 0u)] uint Rm,
+                                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                            [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H, 8H4S, 4S2D>
+        {
+            uint Opcode = 0x4E207000; // SABDL2 V0.8H, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE1(A);
+            Vector128<float> V2 = MakeVectorE1(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 1, new Bits(B));
+            SimdFp.Sabdl_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Pairwise, Description("SUB <V><d>, <V><n>, <V><m>")]
+        public void Sub_S_D([Values(0u)] uint Rd,
+                            [Values(1u, 0u)] uint Rn,
+                            [Values(2u, 0u)] uint Rm,
+                            [ValueSource("_1D_")] [Random(RndCnt)] ulong Z,
+                            [ValueSource("_1D_")] [Random(RndCnt)] ulong A,
+                            [ValueSource("_1D_")] [Random(RndCnt)] ulong B)
+        {
+            uint Opcode = 0x7EE08400; // SUB D0, D0, D0
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             AArch64.V(2, new Bits(B));
             SimdFp.Sub_S(Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
-            });
-        }
-
-        [Test, Description("SUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Sub_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A,
-                                   [ValueSource("_8B4H2S_")] [Random(1)] ulong B,
-                                   [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
-        {
-            uint Opcode = 0x2E228420; // SUB V0.8B, V1.8B, V2.8B
-            Opcode |= ((size & 3) << 22);
-            Bits Op = new Bits(Opcode);
-
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0(A);
-            Vector128<float> V2 = MakeVectorE0(B);
-            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
-
-            AArch64.V(1, new Bits(A));
-            AArch64.V(2, new Bits(B));
-            SimdFp.Sub_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
-
-            Assert.Multiple(() =>
-            {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Pairwise, Description("SUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Sub_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0,
-                                       [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1,
-                                       [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B0,
-                                       [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B1,
-                                       [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
+        public void Sub_V_8B_4H_2S([Values(0u)] uint Rd,
+                                   [Values(1u, 0u)] uint Rn,
+                                   [Values(2u, 0u)] uint Rm,
+                                   [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                   [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                   [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                   [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
         {
-            uint Opcode = 0x6E228420; // SUB V0.16B, V1.16B, V2.16B
+            uint Opcode = 0x2E208400; // SUB V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
+            SimdFp.Sub_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Pairwise, Description("SUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Sub_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                       [Values(1u, 0u)] uint Rn,
+                                       [Values(2u, 0u)] uint Rm,
+                                       [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                       [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
+                                       [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B,
+                                       [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
+        {
+            uint Opcode = 0x6E208400; // SUB V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
             SimdFp.Sub_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -1538,115 +1751,244 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Pairwise, Description("SUBHN{2} <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta>")]
-        public void Subhn_V_8H8B_4S4H_2D2S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0,
-                                           [ValueSource("_4H2S1D_")] [Random(1)] ulong A1,
-                                           [ValueSource("_4H2S1D_")] [Random(1)] ulong B0,
-                                           [ValueSource("_4H2S1D_")] [Random(1)] ulong B1,
+        public void Subhn_V_8H8B_4S4H_2D2S([Values(0u)] uint Rd,
+                                           [Values(1u, 0u)] uint Rn,
+                                           [Values(2u, 0u)] uint Rm,
+                                           [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                           [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A,
+                                           [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong B,
                                            [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H8B, 4S4H, 2D2S>
         {
-            uint Opcode = 0x0E226020; // SUBHN V0.8B, V1.8H, V2.8H
+            uint Opcode = 0x0E206000; // SUBHN V0.8B, V0.8H, V0.8H
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
             SimdFp.Subhn_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Pairwise, Description("SUBHN{2} <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta>")]
-        public void Subhn_V_8H16B_4S8H_2D4S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0,
-                                            [ValueSource("_4H2S1D_")] [Random(1)] ulong A1,
-                                            [ValueSource("_4H2S1D_")] [Random(1)] ulong B0,
-                                            [ValueSource("_4H2S1D_")] [Random(1)] ulong B1,
+        public void Subhn_V_8H16B_4S8H_2D4S([Values(0u)] uint Rd,
+                                            [Values(1u, 0u)] uint Rn,
+                                            [Values(2u, 0u)] uint Rm,
+                                            [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                            [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A,
+                                            [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong B,
                                             [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H16B, 4S8H, 2D4S>
         {
-            uint Opcode = 0x4E226020; // SUBHN2 V0.16B, V1.8H, V2.8H
+            uint Opcode = 0x4E206000; // SUBHN2 V0.16B, V0.8H, V0.8H
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            ulong _E0 = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> V0 = MakeVectorE0(_E0);
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
             SimdFp.Subhn_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(_E0));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
                 Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
-        [Test, Description("UABA <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Uaba_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong _Z,
-                                    [ValueSource("_8B4H2S_")] [Random(1)] ulong A,
-                                    [ValueSource("_8B4H2S_")] [Random(1)] ulong B,
+        [Test, Pairwise, Description("TRN1 <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Trn1_V_8B_4H_2S([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [Values(2u, 0u)] uint Rm,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
                                     [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
         {
-            uint Opcode = 0x2E227C20; // UABA V0.8B, V1.8B, V2.8B
+            uint Opcode = 0x0E002800; // TRN1 V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(_Z, TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             Vector128<float> V2 = MakeVectorE0(B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(0, 0, new Bits(_Z));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             AArch64.V(2, new Bits(B));
-            SimdFp.Uaba_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+            SimdFp.Trn1_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Pairwise, Description("TRN1 <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Trn1_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                        [Values(1u, 0u)] uint Rn,
+                                        [Values(2u, 0u)] uint Rm,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B,
+                                        [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
+        {
+            uint Opcode = 0x4E002800; // TRN1 V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
+            SimdFp.Trn1_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Pairwise, Description("TRN2 <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Trn2_V_8B_4H_2S([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [Values(2u, 0u)] uint Rm,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                    [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
+        {
+            uint Opcode = 0x0E006800; // TRN2 V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
+            SimdFp.Trn2_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Pairwise, Description("TRN2 <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Trn2_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                        [Values(1u, 0u)] uint Rn,
+                                        [Values(2u, 0u)] uint Rm,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B,
+                                        [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
+        {
+            uint Opcode = 0x4E006800; // TRN2 V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
+            SimdFp.Trn2_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Pairwise, Description("UABA <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Uaba_V_16B_8H_4S([ValueSource("_8B4H2S_")] [Random(1)] ulong _Z0,
-                                     [ValueSource("_8B4H2S_")] [Random(1)] ulong _Z1,
-                                     [ValueSource("_8B4H2S_")] [Random(1)] ulong A0,
-                                     [ValueSource("_8B4H2S_")] [Random(1)] ulong A1,
-                                     [ValueSource("_8B4H2S_")] [Random(1)] ulong B0,
-                                     [ValueSource("_8B4H2S_")] [Random(1)] ulong B1,
-                                     [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S>
+        public void Uaba_V_8B_4H_2S([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [Values(2u, 0u)] uint Rm,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                    [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
         {
-            uint Opcode = 0x6E227C20; // UABA V0.16B, V1.16B, V2.16B
+            uint Opcode = 0x2E207C00; // UABA V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(_Z0, _Z1);
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(0, 0, new Bits(_Z0));
-            AArch64.Vpart(0, 1, new Bits(_Z1));
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
+            SimdFp.Uaba_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Pairwise, Description("UABA <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Uaba_V_16B_8H_4S([Values(0u)] uint Rd,
+                                     [Values(1u, 0u)] uint Rn,
+                                     [Values(2u, 0u)] uint Rm,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                     [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S>
+        {
+            uint Opcode = 0x6E207C00; // UABA V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
             SimdFp.Uaba_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -1657,25 +1999,27 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Pairwise, Description("UABAL{2} <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb>")]
-        public void Uabal_V_8B8H_4H4S_2S2D([ValueSource("_8B4H2S_")] [Random(1)] ulong _Z0,
-                                           [ValueSource("_8B4H2S_")] [Random(1)] ulong _Z1,
-                                           [ValueSource("_8B4H2S_")] [Random(1)] ulong A0,
-                                           [ValueSource("_8B4H2S_")] [Random(1)] ulong B0,
-                                           [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
+        public void Uabal_V_8B8H_4H4S_2S2D([Values(0u)] uint Rd,
+                                           [Values(1u, 0u)] uint Rn,
+                                           [Values(2u, 0u)] uint Rm,
+                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                           [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B8H, 4H4S, 2S2D>
         {
-            uint Opcode = 0x2E225020; // UABAL V0.8H, V1.8B, V2.8B
+            uint Opcode = 0x2E205000; // UABAL V0.8H, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(_Z0, _Z1);
-            Vector128<float> V1 = MakeVectorE0(A0);
-            Vector128<float> V2 = MakeVectorE0(B0);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(0, 0, new Bits(_Z0));
-            AArch64.Vpart(0, 1, new Bits(_Z1));
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(2, 0, new Bits(B0));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B));
             SimdFp.Uabal_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -1686,25 +2030,27 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Pairwise, Description("UABAL{2} <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb>")]
-        public void Uabal_V_16B8H_8H4S_4S2D([ValueSource("_8B4H2S_")] [Random(1)] ulong _Z0,
-                                            [ValueSource("_8B4H2S_")] [Random(1)] ulong _Z1,
-                                            [ValueSource("_8B4H2S_")] [Random(1)] ulong A1,
-                                            [ValueSource("_8B4H2S_")] [Random(1)] ulong B1,
-                                            [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S>
+        public void Uabal_V_16B8H_8H4S_4S2D([Values(0u)] uint Rd,
+                                            [Values(1u, 0u)] uint Rn,
+                                            [Values(2u, 0u)] uint Rm,
+                                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                            [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H, 8H4S, 4S2D>
         {
-            uint Opcode = 0x6E225020; // UABAL2 V0.8H, V1.16B, V2.16B
+            uint Opcode = 0x6E205000; // UABAL2 V0.8H, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(_Z0, _Z1);
-            Vector128<float> V1 = MakeVectorE1(A1);
-            Vector128<float> V2 = MakeVectorE1(B1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE1(A);
+            Vector128<float> V2 = MakeVectorE1(B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(0, 0, new Bits(_Z0));
-            AArch64.Vpart(0, 1, new Bits(_Z1));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 1, new Bits(B));
             SimdFp.Uabal_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -1714,53 +2060,59 @@ namespace Ryujinx.Tests.Cpu
             });
         }
 
-        [Test, Description("UABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Uabd_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A,
-                                    [ValueSource("_8B4H2S_")] [Random(1)] ulong B,
+        [Test, Pairwise, Description("UABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Uabd_V_8B_4H_2S([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [Values(2u, 0u)] uint Rm,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
                                     [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
         {
-            uint Opcode = 0x2E227420; // UABD V0.8B, V1.8B, V2.8B
+            uint Opcode = 0x2E207400; // UABD V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(),
-                                                 TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
             Vector128<float> V1 = MakeVectorE0(A);
             Vector128<float> V2 = MakeVectorE0(B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
             AArch64.V(1, new Bits(A));
             AArch64.V(2, new Bits(B));
             SimdFp.Uabd_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
 
         [Test, Pairwise, Description("UABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
-        public void Uabd_V_16B_8H_4S([ValueSource("_8B4H2S_")] [Random(1)] ulong A0,
-                                     [ValueSource("_8B4H2S_")] [Random(1)] ulong A1,
-                                     [ValueSource("_8B4H2S_")] [Random(1)] ulong B0,
-                                     [ValueSource("_8B4H2S_")] [Random(1)] ulong B1,
+        public void Uabd_V_16B_8H_4S([Values(0u)] uint Rd,
+                                     [Values(1u, 0u)] uint Rn,
+                                     [Values(2u, 0u)] uint Rm,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                     [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
                                      [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S>
         {
-            uint Opcode = 0x6E227420; // UABD V0.16B, V1.16B, V2.16B
+            uint Opcode = 0x6E207400; // UABD V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(),
-                                                 TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
-            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 0, new Bits(B0));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
             SimdFp.Uabd_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -1770,23 +2122,28 @@ namespace Ryujinx.Tests.Cpu
             });
         }
 
-        [Test, Description("UABDL{2} <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb>")]
-        public void Uabdl_V_8B8H_4H4S_2S2D([ValueSource("_8B4H2S_")] [Random(1)] ulong A0,
-                                           [ValueSource("_8B4H2S_")] [Random(1)] ulong B0,
-                                           [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
+        [Test, Pairwise, Description("UABDL{2} <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb>")]
+        public void Uabdl_V_8B8H_4H4S_2S2D([Values(0u)] uint Rd,
+                                           [Values(1u, 0u)] uint Rn,
+                                           [Values(2u, 0u)] uint Rm,
+                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                           [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B8H, 4H4S, 2S2D>
         {
-            uint Opcode = 0x2E227020; // UABDL V0.8H, V1.8B, V2.8B
+            uint Opcode = 0x2E207000; // UABDL V0.8H, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(),
-                                                 TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE0(A0);
-            Vector128<float> V2 = MakeVectorE0(B0);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(1, 0, new Bits(A0));
-            AArch64.Vpart(2, 0, new Bits(B0));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B));
             SimdFp.Uabdl_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -1796,23 +2153,28 @@ namespace Ryujinx.Tests.Cpu
             });
         }
 
-        [Test, Description("UABDL{2} <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb>")]
-        public void Uabdl_V_16B8H_8H4S_4S2D([ValueSource("_8B4H2S_")] [Random(1)] ulong A1,
-                                            [ValueSource("_8B4H2S_")] [Random(1)] ulong B1,
-                                            [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S>
+        [Test, Pairwise, Description("UABDL{2} <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb>")]
+        public void Uabdl_V_16B8H_8H4S_4S2D([Values(0u)] uint Rd,
+                                            [Values(1u, 0u)] uint Rn,
+                                            [Values(2u, 0u)] uint Rm,
+                                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                            [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H, 8H4S, 4S2D>
         {
-            uint Opcode = 0x6E227020; // UABDL2 V0.8H, V1.16B, V2.16B
+            uint Opcode = 0x6E207000; // UABDL2 V0.8H, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
             Opcode |= ((size & 3) << 22);
             Bits Op = new Bits(Opcode);
 
-            Vector128<float> V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(),
-                                                 TestContext.CurrentContext.Random.NextULong());
-            Vector128<float> V1 = MakeVectorE1(A1);
-            Vector128<float> V2 = MakeVectorE1(B1);
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE1(A);
+            Vector128<float> V2 = MakeVectorE1(B);
             AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
 
-            AArch64.Vpart(1, 1, new Bits(A1));
-            AArch64.Vpart(2, 1, new Bits(B1));
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 1, new Bits(B));
             SimdFp.Uabdl_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
 
             Assert.Multiple(() =>
@@ -1821,6 +2183,254 @@ namespace Ryujinx.Tests.Cpu
                 Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
             });
         }
+
+        [Test, Pairwise, Description("UZP1 <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Uzp1_V_8B_4H_2S([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [Values(2u, 0u)] uint Rm,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                    [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
+        {
+            uint Opcode = 0x0E001800; // UZP1 V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
+            SimdFp.Uzp1_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Pairwise, Description("UZP1 <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Uzp1_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                        [Values(1u, 0u)] uint Rn,
+                                        [Values(2u, 0u)] uint Rm,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B,
+                                        [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
+        {
+            uint Opcode = 0x4E001800; // UZP1 V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
+            SimdFp.Uzp1_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Pairwise, Description("UZP2 <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Uzp2_V_8B_4H_2S([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [Values(2u, 0u)] uint Rm,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                    [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
+        {
+            uint Opcode = 0x0E005800; // UZP2 V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
+            SimdFp.Uzp2_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Pairwise, Description("UZP2 <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Uzp2_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                        [Values(1u, 0u)] uint Rn,
+                                        [Values(2u, 0u)] uint Rm,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B,
+                                        [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
+        {
+            uint Opcode = 0x4E005800; // UZP2 V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
+            SimdFp.Uzp2_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Pairwise, Description("ZIP1 <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Zip1_V_8B_4H_2S([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [Values(2u, 0u)] uint Rm,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                    [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
+        {
+            uint Opcode = 0x0E003800; // ZIP1 V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
+            SimdFp.Zip1_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Pairwise, Description("ZIP1 <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Zip1_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                        [Values(1u, 0u)] uint Rn,
+                                        [Values(2u, 0u)] uint Rm,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B,
+                                        [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
+        {
+            uint Opcode = 0x4E003800; // ZIP1 V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
+            SimdFp.Zip1_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Pairwise, Description("ZIP2 <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Zip2_V_8B_4H_2S([Values(0u)] uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [Values(2u, 0u)] uint Rm,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+                                    [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B,
+                                    [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
+        {
+            uint Opcode = 0x0E007800; // ZIP2 V0.8B, V0.8B, V0.8B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.V(1, new Bits(A));
+            AArch64.V(2, new Bits(B));
+            SimdFp.Zip2_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Pairwise, Description("ZIP2 <Vd>.<T>, <Vn>.<T>, <Vm>.<T>")]
+        public void Zip2_V_16B_8H_4S_2D([Values(0u)] uint Rd,
+                                        [Values(1u, 0u)] uint Rn,
+                                        [Values(2u, 0u)] uint Rm,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A,
+                                        [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B,
+                                        [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
+        {
+            uint Opcode = 0x4E007800; // ZIP2 V0.16B, V0.16B, V0.16B
+            Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+            Vector128<float> V2 = MakeVectorE0E1(B, B);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z));
+            AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A));
+            AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B));
+            SimdFp.Zip2_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
 #endif
     }
 }
diff --git a/Ryujinx.Tests/Cpu/Tester/Instructions.cs b/Ryujinx.Tests/Cpu/Tester/Instructions.cs
index 1590019a70..68f83423ba 100644
--- a/Ryujinx.Tests/Cpu/Tester/Instructions.cs
+++ b/Ryujinx.Tests/Cpu/Tester/Instructions.cs
@@ -4655,6 +4655,74 @@ namespace Ryujinx.Tests.Cpu.Tester
             Vpart(d, part, result);
         }
 
+        // trn1_advsimd.html
+        public static void Trn1_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd)
+        {
+            const bool op = false;
+
+            /* Decode */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+            int m = (int)UInt(Rm);
+
+            /* if size:Q == '110' then ReservedValue(); */
+
+            int esize = 8 << (int)UInt(size);
+            int datasize = (Q ? 128 : 64);
+            int elements = datasize / esize;
+            int part = (int)UInt(op);
+            int pairs = elements / 2;
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operand1 = V(datasize, n);
+            Bits operand2 = V(datasize, m);
+
+            for (int p = 0; p <= pairs - 1; p++)
+            {
+                Elem(result, 2 * p + 0, esize, Elem(operand1, 2 * p + part, esize));
+                Elem(result, 2 * p + 1, esize, Elem(operand2, 2 * p + part, esize));
+            }
+
+            V(d, result);
+        }
+
+        // trn2_advsimd.html
+        public static void Trn2_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd)
+        {
+            const bool op = true;
+
+            /* Decode */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+            int m = (int)UInt(Rm);
+
+            /* if size:Q == '110' then ReservedValue(); */
+
+            int esize = 8 << (int)UInt(size);
+            int datasize = (Q ? 128 : 64);
+            int elements = datasize / esize;
+            int part = (int)UInt(op);
+            int pairs = elements / 2;
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operand1 = V(datasize, n);
+            Bits operand2 = V(datasize, m);
+
+            for (int p = 0; p <= pairs - 1; p++)
+            {
+                Elem(result, 2 * p + 0, esize, Elem(operand1, 2 * p + part, esize));
+                Elem(result, 2 * p + 1, esize, Elem(operand2, 2 * p + part, esize));
+            }
+
+            V(d, result);
+        }
+
         // uaba_advsimd.html
         public static void Uaba_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd)
         {
@@ -4832,6 +4900,146 @@ namespace Ryujinx.Tests.Cpu.Tester
 
             V(d, result);
         }
+
+        // uzp1_advsimd.html
+        public static void Uzp1_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd)
+        {
+            const bool op = false;
+
+            /* Decode */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+            int m = (int)UInt(Rm);
+
+            /* if size:Q == '110' then ReservedValue(); */
+
+            int esize = 8 << (int)UInt(size);
+            int datasize = (Q ? 128 : 64);
+            int elements = datasize / esize;
+            int part = (int)UInt(op);
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operandl = V(datasize, n);
+            Bits operandh = V(datasize, m);
+
+            Bits zipped = Bits.Concat(operandh, operandl);
+
+            for (int e = 0; e <= elements - 1; e++)
+            {
+                Elem(result, e, esize, Elem(zipped, 2 * e + part, esize));
+            }
+
+            V(d, result);
+        }
+
+        // uzp2_advsimd.html
+        public static void Uzp2_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd)
+        {
+            const bool op = true;
+
+            /* Decode */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+            int m = (int)UInt(Rm);
+
+            /* if size:Q == '110' then ReservedValue(); */
+
+            int esize = 8 << (int)UInt(size);
+            int datasize = (Q ? 128 : 64);
+            int elements = datasize / esize;
+            int part = (int)UInt(op);
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operandl = V(datasize, n);
+            Bits operandh = V(datasize, m);
+
+            Bits zipped = Bits.Concat(operandh, operandl);
+
+            for (int e = 0; e <= elements - 1; e++)
+            {
+                Elem(result, e, esize, Elem(zipped, 2 * e + part, esize));
+            }
+
+            V(d, result);
+        }
+
+        // zip1_advsimd.html
+        public static void Zip1_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd)
+        {
+            const bool op = false;
+
+            /* Decode */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+            int m = (int)UInt(Rm);
+
+            /* if size:Q == '110' then ReservedValue(); */
+
+            int esize = 8 << (int)UInt(size);
+            int datasize = (Q ? 128 : 64);
+            int elements = datasize / esize;
+            int part = (int)UInt(op);
+            int pairs = elements / 2;
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operand1 = V(datasize, n);
+            Bits operand2 = V(datasize, m);
+
+            int @base = part * pairs;
+
+            for (int p = 0; p <= pairs - 1; p++)
+            {
+                Elem(result, 2 * p + 0, esize, Elem(operand1, @base + p, esize));
+                Elem(result, 2 * p + 1, esize, Elem(operand2, @base + p, esize));
+            }
+
+            V(d, result);
+        }
+
+        // zip2_advsimd.html
+        public static void Zip2_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd)
+        {
+            const bool op = true;
+
+            /* Decode */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+            int m = (int)UInt(Rm);
+
+            /* if size:Q == '110' then ReservedValue(); */
+
+            int esize = 8 << (int)UInt(size);
+            int datasize = (Q ? 128 : 64);
+            int elements = datasize / esize;
+            int part = (int)UInt(op);
+            int pairs = elements / 2;
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operand1 = V(datasize, n);
+            Bits operand2 = V(datasize, m);
+
+            int @base = part * pairs;
+
+            for (int p = 0; p <= pairs - 1; p++)
+            {
+                Elem(result, 2 * p + 0, esize, Elem(operand1, @base + p, esize));
+                Elem(result, 2 * p + 1, esize, Elem(operand2, @base + p, esize));
+            }
+
+            V(d, result);
+        }
 #endregion
     }
 }