diff --git a/ChocolArm64/Instructions/InstEmitSimdMove.cs b/ChocolArm64/Instructions/InstEmitSimdMove.cs
index 841dcfe790..647a2238a6 100644
--- a/ChocolArm64/Instructions/InstEmitSimdMove.cs
+++ b/ChocolArm64/Instructions/InstEmitSimdMove.cs
@@ -220,45 +220,37 @@ namespace ChocolArm64.Instructions
 
         public static void Fmov_Ftoi(ILEmitterCtx context)
         {
-            OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp;
+            OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
 
-            EmitVectorExtractZx(context, op.Rn, 0, 3);
-
-            EmitIntZeroUpperIfNeeded(context);
+            EmitVectorExtractZx(context, op.Rn, 0, op.Size + 2);
 
             context.EmitStintzr(op.Rd);
         }
 
         public static void Fmov_Ftoi1(ILEmitterCtx context)
         {
-            OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp;
+            OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
 
             EmitVectorExtractZx(context, op.Rn, 1, 3);
 
-            EmitIntZeroUpperIfNeeded(context);
-
             context.EmitStintzr(op.Rd);
         }
 
         public static void Fmov_Itof(ILEmitterCtx context)
         {
-            OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp;
+            OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
 
             context.EmitLdintzr(op.Rn);
 
-            EmitIntZeroUpperIfNeeded(context);
-
-            EmitScalarSet(context, op.Rd, 3);
+            EmitScalarSet(context, op.Rd, op.Size + 2);
         }
 
         public static void Fmov_Itof1(ILEmitterCtx context)
         {
-            OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp;
+            OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
 
             context.EmitLdintzr(op.Rn);
 
-            EmitIntZeroUpperIfNeeded(context);
-
             EmitVectorInsert(context, op.Rd, 1, 3);
         }
 
@@ -280,7 +272,7 @@ namespace ChocolArm64.Instructions
             EmitScalarSet(context, op.Rd, op.Size + 2);
         }
 
-        public static void Fmov_V(ILEmitterCtx context)
+        public static void Fmov_Vi(ILEmitterCtx context)
         {
             OpCodeSimdImm64 op = (OpCodeSimdImm64)context.CurrOp;
 
@@ -347,7 +339,11 @@ namespace ChocolArm64.Instructions
 
             EmitVectorExtractSx(context, op.Rn, op.DstIndex, op.Size);
 
-            EmitIntZeroUpperIfNeeded(context);
+            if (op.RegisterSize == RegisterSize.Simd64)
+            {
+                context.Emit(OpCodes.Conv_U4);
+                context.Emit(OpCodes.Conv_U8);
+            }
 
             context.EmitStintzr(op.Rd);
         }
@@ -544,16 +540,6 @@ namespace ChocolArm64.Instructions
             EmitVectorZip(context, part: 1);
         }
 
-        private static void EmitIntZeroUpperIfNeeded(ILEmitterCtx context)
-        {
-            if (context.CurrOp.RegisterSize == RegisterSize.Int32 ||
-                context.CurrOp.RegisterSize == RegisterSize.Simd64)
-            {
-                context.Emit(OpCodes.Conv_U4);
-                context.Emit(OpCodes.Conv_U8);
-            }
-        }
-
         private static void EmitMoviMvni(ILEmitterCtx context, bool not)
         {
             OpCodeSimdImm64 op = (OpCodeSimdImm64)context.CurrOp;
diff --git a/ChocolArm64/OpCodeTable.cs b/ChocolArm64/OpCodeTable.cs
index 2200faefd1..74cbdab0bc 100644
--- a/ChocolArm64/OpCodeTable.cs
+++ b/ChocolArm64/OpCodeTable.cs
@@ -233,7 +233,8 @@ namespace ChocolArm64
             SetA64("0100111000101000011010xxxxxxxxxx", InstEmit.Aesmc_V,         typeof(OpCodeSimd64));
             SetA64("0x001110001xxxxx000111xxxxxxxxxx", InstEmit.And_V,           typeof(OpCodeSimdReg64));
             SetA64("0x001110011xxxxx000111xxxxxxxxxx", InstEmit.Bic_V,           typeof(OpCodeSimdReg64));
-            SetA64("0x10111100000xxx<<x101xxxxxxxxxx", InstEmit.Bic_Vi,          typeof(OpCodeSimdImm64));
+            SetA64("0x10111100000xxx0xx101xxxxxxxxxx", InstEmit.Bic_Vi,          typeof(OpCodeSimdImm64));
+            SetA64("0x10111100000xxx10x101xxxxxxxxxx", InstEmit.Bic_Vi,          typeof(OpCodeSimdImm64));
             SetA64("0x101110111xxxxx000111xxxxxxxxxx", InstEmit.Bif_V,           typeof(OpCodeSimdReg64));
             SetA64("0x101110101xxxxx000111xxxxxxxxxx", InstEmit.Bit_V,           typeof(OpCodeSimdReg64));
             SetA64("0x101110011xxxxx000111xxxxxxxxxx", InstEmit.Bsl_V,           typeof(OpCodeSimdReg64));
@@ -341,12 +342,15 @@ namespace ChocolArm64
             SetA64("0>0011101<1xxxxx110011xxxxxxxxxx", InstEmit.Fmls_V,          typeof(OpCodeSimdReg64));
             SetA64("0>0011111<xxxxxx0101x0xxxxxxxxxx", InstEmit.Fmls_Ve,         typeof(OpCodeSimdRegElemF64));
             SetA64("000111100x100000010000xxxxxxxxxx", InstEmit.Fmov_S,          typeof(OpCodeSimd64));
-            SetA64("00011110xx1xxxxxxxx100xxxxxxxxxx", InstEmit.Fmov_Si,         typeof(OpCodeSimdFmov64));
-            SetA64("0xx0111100000xxx111101xxxxxxxxxx", InstEmit.Fmov_V,          typeof(OpCodeSimdImm64));
-            SetA64("x00111100x100110000000xxxxxxxxxx", InstEmit.Fmov_Ftoi,       typeof(OpCodeSimdCvt64));
-            SetA64("x00111100x100111000000xxxxxxxxxx", InstEmit.Fmov_Itof,       typeof(OpCodeSimdCvt64));
-            SetA64("1001111010101110000000xxxxxxxxxx", InstEmit.Fmov_Ftoi1,      typeof(OpCodeSimdCvt64));
-            SetA64("1001111010101111000000xxxxxxxxxx", InstEmit.Fmov_Itof1,      typeof(OpCodeSimdCvt64));
+            SetA64("000111100x1xxxxxxxx10000000xxxxx", InstEmit.Fmov_Si,         typeof(OpCodeSimdFmov64));
+            SetA64("0x00111100000xxx111101xxxxxxxxxx", InstEmit.Fmov_Vi,         typeof(OpCodeSimdImm64));
+            SetA64("0110111100000xxx111101xxxxxxxxxx", InstEmit.Fmov_Vi,         typeof(OpCodeSimdImm64));
+            SetA64("0001111000100110000000xxxxxxxxxx", InstEmit.Fmov_Ftoi,       typeof(OpCodeSimd64));
+            SetA64("1001111001100110000000xxxxxxxxxx", InstEmit.Fmov_Ftoi,       typeof(OpCodeSimd64));
+            SetA64("0001111000100111000000xxxxxxxxxx", InstEmit.Fmov_Itof,       typeof(OpCodeSimd64));
+            SetA64("1001111001100111000000xxxxxxxxxx", InstEmit.Fmov_Itof,       typeof(OpCodeSimd64));
+            SetA64("1001111010101110000000xxxxxxxxxx", InstEmit.Fmov_Ftoi1,      typeof(OpCodeSimd64));
+            SetA64("1001111010101111000000xxxxxxxxxx", InstEmit.Fmov_Itof1,      typeof(OpCodeSimd64));
             SetA64("000111110x0xxxxx1xxxxxxxxxxxxxxx", InstEmit.Fmsub_S,         typeof(OpCodeSimdReg64));
             SetA64("000111100x1xxxxx000010xxxxxxxxxx", InstEmit.Fmul_S,          typeof(OpCodeSimdReg64));
             SetA64("010111111xxxxxxx1001x0xxxxxxxxxx", InstEmit.Fmul_Se,         typeof(OpCodeSimdRegElemF64));
@@ -419,7 +423,8 @@ namespace ChocolArm64
             SetA64("0x10111000100000010110xxxxxxxxxx", InstEmit.Not_V,           typeof(OpCodeSimd64));
             SetA64("0x001110111xxxxx000111xxxxxxxxxx", InstEmit.Orn_V,           typeof(OpCodeSimdReg64));
             SetA64("0x001110101xxxxx000111xxxxxxxxxx", InstEmit.Orr_V,           typeof(OpCodeSimdReg64));
-            SetA64("0x00111100000xxx<<x101xxxxxxxxxx", InstEmit.Orr_Vi,          typeof(OpCodeSimdImm64));
+            SetA64("0x00111100000xxx0xx101xxxxxxxxxx", InstEmit.Orr_Vi,          typeof(OpCodeSimdImm64));
+            SetA64("0x00111100000xxx10x101xxxxxxxxxx", InstEmit.Orr_Vi,          typeof(OpCodeSimdImm64));
             SetA64("0x101110<<1xxxxx010000xxxxxxxxxx", InstEmit.Raddhn_V,        typeof(OpCodeSimdReg64));
             SetA64("0x10111001100000010110xxxxxxxxxx", InstEmit.Rbit_V,          typeof(OpCodeSimd64));
             SetA64("0x00111000100000000110xxxxxxxxxx", InstEmit.Rev16_V,         typeof(OpCodeSimd64));
@@ -458,7 +463,8 @@ namespace ChocolArm64
             SetA64("0x101110<<100001001110xxxxxxxxxx", InstEmit.Shll_V,          typeof(OpCodeSimd64));
             SetA64("0x00111100>>>xxx100001xxxxxxxxxx", InstEmit.Shrn_V,          typeof(OpCodeSimdShImm64));
             SetA64("0x001110<<1xxxxx001001xxxxxxxxxx", InstEmit.Shsub_V,         typeof(OpCodeSimdReg64));
-            SetA64("0x1011110>>>>xxx010101xxxxxxxxxx", InstEmit.Sli_V,           typeof(OpCodeSimdShImm64));
+            SetA64("0x10111100>>>xxx010101xxxxxxxxxx", InstEmit.Sli_V,           typeof(OpCodeSimdShImm64));
+            SetA64("0110111101xxxxxx010101xxxxxxxxxx", InstEmit.Sli_V,           typeof(OpCodeSimdShImm64));
             SetA64("0x001110<<1xxxxx011001xxxxxxxxxx", InstEmit.Smax_V,          typeof(OpCodeSimdReg64));
             SetA64("0x001110<<1xxxxx101001xxxxxxxxxx", InstEmit.Smaxp_V,         typeof(OpCodeSimdReg64));
             SetA64("000011100x110000101010xxxxxxxxxx", InstEmit.Smaxv_V,         typeof(OpCodeSimd64));
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs
index c08cacacfc..a62845feaa 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs
@@ -618,6 +618,18 @@ namespace Ryujinx.Tests.Cpu
                 yield return rnd4;
             }
         }
+
+        private static uint[] _W_()
+        {
+            return new uint[] { 0x00000000u, 0x7FFFFFFFu,
+                                0x80000000u, 0xFFFFFFFFu };
+        }
+
+        private static ulong[] _X_()
+        {
+            return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul,
+                                 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul };
+        }
 #endregion
 
 #region "ValueSource (Opcodes)"
@@ -877,6 +889,70 @@ namespace Ryujinx.Tests.Cpu
             };
         }
 
+        private static uint[] _F_Mov_Ftoi_SW_()
+        {
+            return new uint[]
+            {
+                0x1E260000u // FMOV W0, S0
+            };
+        }
+
+        private static uint[] _F_Mov_Ftoi_DX_()
+        {
+            return new uint[]
+            {
+                0x9E660000u // FMOV X0, D0
+            };
+        }
+
+        private static uint[] _F_Mov_Ftoi1_DX_()
+        {
+            return new uint[]
+            {
+                0x9EAE0000u // FMOV X0, V0.D[1]
+            };
+        }
+
+        private static uint[] _F_Mov_Itof_WS_()
+        {
+            return new uint[]
+            {
+                0x1E270000u // FMOV S0, W0
+            };
+        }
+
+        private static uint[] _F_Mov_Itof_XD_()
+        {
+            return new uint[]
+            {
+                0x9E670000u // FMOV D0, X0
+            };
+        }
+
+        private static uint[] _F_Mov_Itof1_XD_()
+        {
+            return new uint[]
+            {
+                0x9EAF0000u // FMOV V0.D[1], X0
+            };
+        }
+
+        private static uint[] _F_Mov_S_S_()
+        {
+            return new uint[]
+            {
+                0x1E204020u // FMOV S0, S1
+            };
+        }
+
+        private static uint[] _F_Mov_S_D_()
+        {
+            return new uint[]
+            {
+                0x1E604020u // FMOV D0, D1
+            };
+        }
+
         private static uint[] _F_Recpe_Rsqrte_S_S_()
         {
             return new uint[]
@@ -997,6 +1073,24 @@ namespace Ryujinx.Tests.Cpu
             };
         }
 
+        private static uint[] _SU_Addl_V_V_8BH_4HS_()
+        {
+            return new uint[]
+            {
+                0x0E303800u, // SADDLV H0, V0.8B
+                0x2E303800u  // UADDLV H0, V0.8B
+            };
+        }
+
+        private static uint[] _SU_Addl_V_V_16BH_8HS_4SD_()
+        {
+            return new uint[]
+            {
+                0x4E303800u, // SADDLV H0, V0.16B
+                0x6E303800u  // UADDLV H0, V0.16B
+            };
+        }
+
         private static uint[] _SU_Cvt_F_S_S_()
         {
             return new uint[]
@@ -2024,6 +2118,132 @@ namespace Ryujinx.Tests.Cpu
             CompareAgainstUnicorn();
         }
 
+        [Test, Pairwise] [Explicit]
+        public void F_Mov_Ftoi_SW([ValueSource("_F_Mov_Ftoi_SW_")] uint opcodes,
+                                  [Values(0u, 31u)] uint rd,
+                                  [Values(1u)]      uint rn,
+                                  [ValueSource("_1S_F_")] ulong a)
+        {
+            opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
+
+            ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32;
+            uint w31 = TestContext.CurrentContext.Random.NextUInt();
+            Vector128<float> v1 = MakeVectorE0(a);
+
+            SingleOpcode(opcodes, x0: x0, x31: w31, v1: v1);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise] [Explicit]
+        public void F_Mov_Ftoi_DX([ValueSource("_F_Mov_Ftoi_DX_")] uint opcodes,
+                                  [Values(0u, 31u)] uint rd,
+                                  [Values(1u)]      uint rn,
+                                  [ValueSource("_1D_F_")] ulong a)
+        {
+            opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
+
+            ulong x31 = TestContext.CurrentContext.Random.NextULong();
+            Vector128<float> v1 = MakeVectorE0(a);
+
+            SingleOpcode(opcodes, x31: x31, v1: v1);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise] [Explicit]
+        public void F_Mov_Ftoi1_DX([ValueSource("_F_Mov_Ftoi1_DX_")] uint opcodes,
+                                   [Values(0u, 31u)] uint rd,
+                                   [Values(1u)]      uint rn,
+                                   [ValueSource("_1D_F_")] ulong a)
+        {
+            opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
+
+            ulong x31 = TestContext.CurrentContext.Random.NextULong();
+            Vector128<float> v1 = MakeVectorE1(a);
+
+            SingleOpcode(opcodes, x31: x31, v1: v1);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise] [Explicit]
+        public void F_Mov_Itof_WS([ValueSource("_F_Mov_Itof_WS_")] uint opcodes,
+                                  [Values(0u)]      uint rd,
+                                  [Values(1u, 31u)] uint rn,
+                                  [ValueSource("_W_")] [Random(RndCnt)] uint wn)
+        {
+            opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
+
+            uint  w31 = TestContext.CurrentContext.Random.NextUInt();
+            ulong z   = TestContext.CurrentContext.Random.NextULong();
+            Vector128<float> v0 = MakeVectorE0E1(z, z);
+
+            SingleOpcode(opcodes, x1: wn, x31: w31, v0: v0);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise] [Explicit]
+        public void F_Mov_Itof_XD([ValueSource("_F_Mov_Itof_XD_")] uint opcodes,
+                                  [Values(0u)]      uint rd,
+                                  [Values(1u, 31u)] uint rn,
+                                  [ValueSource("_X_")] [Random(RndCnt)] ulong xn)
+        {
+            opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
+
+            ulong x31 = TestContext.CurrentContext.Random.NextULong();
+            ulong z   = TestContext.CurrentContext.Random.NextULong();
+            Vector128<float> v0 = MakeVectorE1(z);
+
+            SingleOpcode(opcodes, x1: xn, x31: x31, v0: v0);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise] [Explicit]
+        public void F_Mov_Itof1_XD([ValueSource("_F_Mov_Itof1_XD_")] uint opcodes,
+                                   [Values(0u)]      uint rd,
+                                   [Values(1u, 31u)] uint rn,
+                                   [ValueSource("_X_")] [Random(RndCnt)] ulong xn)
+        {
+            opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
+
+            ulong x31 = TestContext.CurrentContext.Random.NextULong();
+            ulong z   = TestContext.CurrentContext.Random.NextULong();
+            Vector128<float> v0 = MakeVectorE0(z);
+
+            SingleOpcode(opcodes, x1: xn, x31: x31, v0: v0);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise] [Explicit]
+        public void F_Mov_S_S([ValueSource("_F_Mov_S_S_")] uint opcodes,
+                              [ValueSource("_1S_F_")] ulong a)
+        {
+            ulong z = TestContext.CurrentContext.Random.NextULong();
+            Vector128<float> v0 = MakeVectorE0E1(z, z);
+            Vector128<float> v1 = MakeVectorE0(a);
+
+            SingleOpcode(opcodes, v0: v0, v1: v1);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise] [Explicit]
+        public void F_Mov_S_D([ValueSource("_F_Mov_S_D_")] uint opcodes,
+                              [ValueSource("_1D_F_")] ulong a)
+        {
+            ulong z = TestContext.CurrentContext.Random.NextULong();
+            Vector128<float> v0 = MakeVectorE1(z);
+            Vector128<float> v1 = MakeVectorE0(a);
+
+            SingleOpcode(opcodes, v0: v0, v1: v1);
+
+            CompareAgainstUnicorn();
+        }
+
         [Test, Pairwise] [Explicit]
         public void F_Recpe_Rsqrte_S_S([ValueSource("_F_Recpe_Rsqrte_S_S_")] uint opcodes,
                                        [ValueSource("_1S_F_")] ulong a,
@@ -2559,6 +2779,44 @@ namespace Ryujinx.Tests.Cpu
             CompareAgainstUnicorn();
         }
 
+        [Test, Pairwise]
+        public void SU_Addl_V_V_8BH_4HS([ValueSource("_SU_Addl_V_V_8BH_4HS_")] uint opcodes,
+                                        [Values(0u)]     uint rd,
+                                        [Values(1u, 0u)] uint rn,
+                                        [ValueSource("_8B4H_")] [Random(RndCnt)] ulong z,
+                                        [ValueSource("_8B4H_")] [Random(RndCnt)] ulong a,
+                                        [Values(0b00u, 0b01u)] uint size) // <8BH, 4HS>
+        {
+            opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
+            opcodes |= ((size & 3) << 22);
+
+            Vector128<float> v0 = MakeVectorE0E1(z, z);
+            Vector128<float> v1 = MakeVectorE0(a);
+
+            SingleOpcode(opcodes, v0: v0, v1: v1);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise]
+        public void SU_Addl_V_V_16BH_8HS_4SD([ValueSource("_SU_Addl_V_V_16BH_8HS_4SD_")] uint opcodes,
+                                             [Values(0u)]     uint rd,
+                                             [Values(1u, 0u)] uint rn,
+                                             [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong z,
+                                             [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong a,
+                                             [Values(0b00u, 0b01u, 0b10u)] uint size) // <16BH, 8HS, 4SD>
+        {
+            opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
+            opcodes |= ((size & 3) << 22);
+
+            Vector128<float> v0 = MakeVectorE0E1(z, z);
+            Vector128<float> v1 = MakeVectorE0E1(a, a);
+
+            SingleOpcode(opcodes, v0: v0, v1: v1);
+
+            CompareAgainstUnicorn();
+        }
+
         [Test, Pairwise] [Explicit]
         public void SU_Cvt_F_S_S([ValueSource("_SU_Cvt_F_S_S_")] uint opcodes,
                                  [ValueSource("_1S_")] [Random(RndCnt)] ulong a)
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdFcond.cs b/Ryujinx.Tests/Cpu/CpuTestSimdFcond.cs
index 48efc18fd2..4ccd43dbb9 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdFcond.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdFcond.cs
@@ -115,6 +115,22 @@ namespace Ryujinx.Tests.Cpu
                 0x1E620430u  // FCCMPE D1, D2, #0, EQ
             };
         }
+
+        private static uint[] _F_Csel_S_S_()
+        {
+            return new uint[]
+            {
+                0x1E220C20u // FCSEL S0, S1, S2, EQ
+            };
+        }
+
+        private static uint[] _F_Csel_S_D_()
+        {
+            return new uint[]
+            {
+                0x1E620C20u // FCSEL D0, D1, D2, EQ
+            };
+        }
 #endregion
 
         private const int RndCnt     = 2;
@@ -173,6 +189,48 @@ namespace Ryujinx.Tests.Cpu
 
             CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc);
         }
+
+        [Test, Pairwise] [Explicit]
+        public void F_Csel_S_S([ValueSource("_F_Csel_S_S_")] uint opcodes,
+                               [ValueSource("_1S_F_")] ulong a,
+                               [ValueSource("_1S_F_")] ulong b,
+                               [Values(0b0000u, 0b0001u, 0b0010u, 0b0011u,             // <EQ, NE, CS/HS, CC/LO,
+                                       0b0100u, 0b0101u, 0b0110u, 0b0111u,             //  MI, PL, VS, VC,
+                                       0b1000u, 0b1001u, 0b1010u, 0b1011u,             //  HI, LS, GE, LT,
+                                       0b1100u, 0b1101u, 0b1110u, 0b1111u)] uint cond) //  GT, LE, AL, NV>
+        {
+            opcodes |= ((cond & 15) << 12);
+
+            ulong z = TestContext.CurrentContext.Random.NextULong();
+            Vector128<float> v0 = MakeVectorE0E1(z, z);
+            Vector128<float> v1 = MakeVectorE0(a);
+            Vector128<float> v2 = MakeVectorE0(b);
+
+            SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise] [Explicit]
+        public void F_Csel_S_D([ValueSource("_F_Csel_S_D_")] uint opcodes,
+                               [ValueSource("_1D_F_")] ulong a,
+                               [ValueSource("_1D_F_")] ulong b,
+                               [Values(0b0000u, 0b0001u, 0b0010u, 0b0011u,             // <EQ, NE, CS/HS, CC/LO,
+                                       0b0100u, 0b0101u, 0b0110u, 0b0111u,             //  MI, PL, VS, VC,
+                                       0b1000u, 0b1001u, 0b1010u, 0b1011u,             //  HI, LS, GE, LT,
+                                       0b1100u, 0b1101u, 0b1110u, 0b1111u)] uint cond) //  GT, LE, AL, NV>
+        {
+            opcodes |= ((cond & 15) << 12);
+
+            ulong z = TestContext.CurrentContext.Random.NextULong();
+            Vector128<float> v0 = MakeVectorE1(z);
+            Vector128<float> v1 = MakeVectorE0(a);
+            Vector128<float> v2 = MakeVectorE0(b);
+
+            SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
+
+            CompareAgainstUnicorn();
+        }
 #endif
     }
 }
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdFmov.cs b/Ryujinx.Tests/Cpu/CpuTestSimdFmov.cs
new file mode 100644
index 0000000000..a7e0e0f968
--- /dev/null
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdFmov.cs
@@ -0,0 +1,61 @@
+#define SimdFmov
+
+using NUnit.Framework;
+
+using System.Runtime.Intrinsics;
+
+namespace Ryujinx.Tests.Cpu
+{
+    [Category("SimdFmov")]
+    public sealed class CpuTestSimdFmov : CpuTest
+    {
+#if SimdFmov
+
+#region "ValueSource"
+        private static uint[] _F_Mov_Si_S_()
+        {
+            return new uint[]
+            {
+                0x1E201000u // FMOV S0, #2.0
+            };
+        }
+
+        private static uint[] _F_Mov_Si_D_()
+        {
+            return new uint[]
+            {
+                0x1E601000u // FMOV D0, #2.0
+            };
+        }
+#endregion
+
+        [Test, Pairwise] [Explicit]
+        public void F_Mov_Si_S([ValueSource("_F_Mov_Si_S_")] uint opcodes,
+                               [Range(0u, 255u, 1u)] uint imm8)
+        {
+            opcodes |= ((imm8 & 0xFFu) << 13);
+
+            ulong z = TestContext.CurrentContext.Random.NextULong();
+            Vector128<float> v0 = MakeVectorE0E1(z, z);
+
+            SingleOpcode(opcodes, v0: v0);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise] [Explicit]
+        public void F_Mov_Si_D([ValueSource("_F_Mov_Si_D_")] uint opcodes,
+                               [Range(0u, 255u, 1u)] uint imm8)
+        {
+            opcodes |= ((imm8 & 0xFFu) << 13);
+
+            ulong z = TestContext.CurrentContext.Random.NextULong();
+            Vector128<float> v0 = MakeVectorE1(z);
+
+            SingleOpcode(opcodes, v0: v0);
+
+            CompareAgainstUnicorn();
+        }
+#endif
+    }
+}
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdImm.cs b/Ryujinx.Tests/Cpu/CpuTestSimdImm.cs
index bb6e117395..ce8f63bc6c 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdImm.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdImm.cs
@@ -47,6 +47,18 @@ namespace Ryujinx.Tests.Cpu
 #endregion
 
 #region "ValueSource (Types)"
+        private static ulong[] _2S_()
+        {
+            return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFF7FFFFFFFul,
+                                 0x8000000080000000ul, 0xFFFFFFFFFFFFFFFFul };
+        }
+
+        private static ulong[] _4H_()
+        {
+            return new ulong[] { 0x0000000000000000ul, 0x7FFF7FFF7FFF7FFFul,
+                                 0x8000800080008000ul, 0xFFFFFFFFFFFFFFFFul };
+        }
+
         private static IEnumerable<byte> _8BIT_IMM_()
         {
             yield return 0x00;
@@ -79,6 +91,48 @@ namespace Ryujinx.Tests.Cpu
 #endregion
 
 #region "ValueSource (Opcodes)"
+        private static uint[] _Bic_Orr_Vi_16bit_()
+        {
+            return new uint[]
+            {
+                0x2F009400u, // BIC V0.4H, #0
+                0x0F009400u  // ORR V0.4H, #0
+            };
+        }
+
+        private static uint[] _Bic_Orr_Vi_32bit_()
+        {
+            return new uint[]
+            {
+                0x2F001400u, // BIC V0.2S, #0
+                0x0F001400u  // ORR V0.2S, #0
+            };
+        }
+
+        private static uint[] _F_Mov_Vi_2S_()
+        {
+            return new uint[]
+            {
+                0x0F00F400u // FMOV V0.2S, #2.0
+            };
+        }
+
+        private static uint[] _F_Mov_Vi_4S_()
+        {
+            return new uint[]
+            {
+                0x4F00F400u // FMOV V0.4S, #2.0
+            };
+        }
+
+        private static uint[] _F_Mov_Vi_2D_()
+        {
+            return new uint[]
+            {
+                0x6F00F400u // FMOV V0.2D, #2.0
+            };
+        }
+
         private static uint[] _Movi_V_8bit_()
         {
             return new uint[]
@@ -131,19 +185,105 @@ namespace Ryujinx.Tests.Cpu
         }
 #endregion
 
+        private const int RndCnt      = 2;
         private const int RndCntImm8  = 2;
         private const int RndCntImm64 = 2;
 
+        [Test, Pairwise]
+        public void Bic_Orr_Vi_16bit([ValueSource("_Bic_Orr_Vi_16bit_")] uint opcodes,
+                                     [ValueSource("_4H_")] [Random(RndCnt)] ulong z,
+                                     [ValueSource("_8BIT_IMM_")] byte imm8,
+                                     [Values(0b0u, 0b1u)] uint amount, // <0, 8>
+                                     [Values(0b0u, 0b1u)] uint q)      // <4H, 8H>
+        {
+            uint abc   = (imm8 & 0xE0u) >> 5;
+            uint defgh = (imm8 & 0x1Fu);
+
+            opcodes |= (abc << 16) | (defgh << 5);
+            opcodes |= ((amount & 1) << 13);
+            opcodes |= ((q & 1) << 30);
+
+            Vector128<float> v0 = MakeVectorE0E1(z, z);
+
+            SingleOpcode(opcodes, v0: v0);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise]
+        public void Bic_Orr_Vi_32bit([ValueSource("_Bic_Orr_Vi_32bit_")] uint opcodes,
+                                     [ValueSource("_2S_")] [Random(RndCnt)] ulong z,
+                                     [ValueSource("_8BIT_IMM_")] byte imm8,
+                                     [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint amount, // <0, 8, 16, 24>
+                                     [Values(0b0u, 0b1u)] uint q)                      // <2S, 4S>
+        {
+            uint abc   = (imm8 & 0xE0u) >> 5;
+            uint defgh = (imm8 & 0x1Fu);
+
+            opcodes |= (abc << 16) | (defgh << 5);
+            opcodes |= ((amount & 3) << 13);
+            opcodes |= ((q & 1) << 30);
+
+            Vector128<float> v0 = MakeVectorE0E1(z, z);
+
+            SingleOpcode(opcodes, v0: v0);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise] [Explicit]
+        public void F_Mov_Vi_2S([ValueSource("_F_Mov_Vi_2S_")] uint opcodes,
+                                [Range(0u, 255u, 1u)] uint abcdefgh)
+        {
+            uint abc   = (abcdefgh & 0xE0u) >> 5;
+            uint defgh = (abcdefgh & 0x1Fu);
+
+            opcodes |= (abc << 16) | (defgh << 5);
+
+            ulong z = TestContext.CurrentContext.Random.NextULong();
+            Vector128<float> v0 = MakeVectorE1(z);
+
+            SingleOpcode(opcodes, v0: v0);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise] [Explicit]
+        public void F_Mov_Vi_4S([ValueSource("_F_Mov_Vi_4S_")] uint opcodes,
+                                [Range(0u, 255u, 1u)] uint abcdefgh)
+        {
+            uint abc   = (abcdefgh & 0xE0u) >> 5;
+            uint defgh = (abcdefgh & 0x1Fu);
+
+            opcodes |= (abc << 16) | (defgh << 5);
+
+            SingleOpcode(opcodes);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise] [Explicit]
+        public void F_Mov_Vi_2D([ValueSource("_F_Mov_Vi_2D_")] uint opcodes,
+                                [Range(0u, 255u, 1u)] uint abcdefgh)
+        {
+            uint abc   = (abcdefgh & 0xE0u) >> 5;
+            uint defgh = (abcdefgh & 0x1Fu);
+
+            opcodes |= (abc << 16) | (defgh << 5);
+
+            SingleOpcode(opcodes);
+
+            CompareAgainstUnicorn();
+        }
+
         [Test, Pairwise]
         public void Movi_V_8bit([ValueSource("_Movi_V_8bit_")] uint opcodes,
-                                [Values(0u)] uint rd,
                                 [ValueSource("_8BIT_IMM_")] byte imm8,
                                 [Values(0b0u, 0b1u)] uint q) // <8B, 16B>
         {
             uint abc   = (imm8 & 0xE0u) >> 5;
             uint defgh = (imm8 & 0x1Fu);
 
-            opcodes |= ((rd & 31) << 0);
             opcodes |= (abc << 16) | (defgh << 5);
             opcodes |= ((q & 1) << 30);
 
@@ -157,7 +297,6 @@ namespace Ryujinx.Tests.Cpu
 
         [Test, Pairwise]
         public void Movi_Mvni_V_16bit_shifted_imm([ValueSource("_Movi_Mvni_V_16bit_shifted_imm_")] uint opcodes,
-                                                  [Values(0u)] uint rd,
                                                   [ValueSource("_8BIT_IMM_")] byte imm8,
                                                   [Values(0b0u, 0b1u)] uint amount, // <0, 8>
                                                   [Values(0b0u, 0b1u)] uint q)      // <4H, 8H>
@@ -165,7 +304,6 @@ namespace Ryujinx.Tests.Cpu
             uint abc   = (imm8 & 0xE0u) >> 5;
             uint defgh = (imm8 & 0x1Fu);
 
-            opcodes |= ((rd & 31) << 0);
             opcodes |= (abc << 16) | (defgh << 5);
             opcodes |= ((amount & 1) << 13);
             opcodes |= ((q & 1) << 30);
@@ -180,7 +318,6 @@ namespace Ryujinx.Tests.Cpu
 
         [Test, Pairwise]
         public void Movi_Mvni_V_32bit_shifted_imm([ValueSource("_Movi_Mvni_V_32bit_shifted_imm_")] uint opcodes,
-                                                  [Values(0u)] uint rd,
                                                   [ValueSource("_8BIT_IMM_")] byte imm8,
                                                   [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint amount, // <0, 8, 16, 24>
                                                   [Values(0b0u, 0b1u)] uint q)                      // <2S, 4S>
@@ -188,7 +325,6 @@ namespace Ryujinx.Tests.Cpu
             uint abc   = (imm8 & 0xE0u) >> 5;
             uint defgh = (imm8 & 0x1Fu);
 
-            opcodes |= ((rd & 31) << 0);
             opcodes |= (abc << 16) | (defgh << 5);
             opcodes |= ((amount & 3) << 13);
             opcodes |= ((q & 1) << 30);
@@ -203,7 +339,6 @@ namespace Ryujinx.Tests.Cpu
 
         [Test, Pairwise]
         public void Movi_Mvni_V_32bit_shifting_ones([ValueSource("_Movi_Mvni_V_32bit_shifting_ones_")] uint opcodes,
-                                                    [Values(0u)] uint rd,
                                                     [ValueSource("_8BIT_IMM_")] byte imm8,
                                                     [Values(0b0u, 0b1u)] uint amount, // <8, 16>
                                                     [Values(0b0u, 0b1u)] uint q)      // <2S, 4S>
@@ -211,7 +346,6 @@ namespace Ryujinx.Tests.Cpu
             uint abc   = (imm8 & 0xE0u) >> 5;
             uint defgh = (imm8 & 0x1Fu);
 
-            opcodes |= ((rd & 31) << 0);
             opcodes |= (abc << 16) | (defgh << 5);
             opcodes |= ((amount & 1) << 12);
             opcodes |= ((q & 1) << 30);
@@ -226,7 +360,6 @@ namespace Ryujinx.Tests.Cpu
 
         [Test, Pairwise]
         public void Movi_V_64bit_scalar([ValueSource("_Movi_V_64bit_scalar_")] uint opcodes,
-                                        [Values(0u)] uint rd,
                                         [ValueSource("_64BIT_IMM_")] ulong imm)
         {
             byte imm8 = ShrinkImm64(imm);
@@ -234,7 +367,6 @@ namespace Ryujinx.Tests.Cpu
             uint abc   = (imm8 & 0xE0u) >> 5;
             uint defgh = (imm8 & 0x1Fu);
 
-            opcodes |= ((rd & 31) << 0);
             opcodes |= (abc << 16) | (defgh << 5);
 
             ulong z = TestContext.CurrentContext.Random.NextULong();
@@ -247,7 +379,6 @@ namespace Ryujinx.Tests.Cpu
 
         [Test, Pairwise]
         public void Movi_V_64bit_vector([ValueSource("_Movi_V_64bit_vector_")] uint opcodes,
-                                        [Values(0u)] uint rd,
                                         [ValueSource("_64BIT_IMM_")] ulong imm)
         {
             byte imm8 = ShrinkImm64(imm);
@@ -255,7 +386,6 @@ namespace Ryujinx.Tests.Cpu
             uint abc   = (imm8 & 0xE0u) >> 5;
             uint defgh = (imm8 & 0x1Fu);
 
-            opcodes |= ((rd & 31) << 0);
             opcodes |= (abc << 16) | (defgh << 5);
 
             SingleOpcode(opcodes);
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs b/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs
index fe93f06e37..15581d69b8 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs
@@ -70,7 +70,7 @@ namespace Ryujinx.Tests.Cpu
         private const int RndCnt      = 2;
         private const int RndCntIndex = 2;
 
-        [Test, Pairwise, Description("DUP <Vd>.<T>, <R><n>")]
+        [Test, Pairwise, Description("DUP <Vd>.<T>, W<n>")]
         public void Dup_Gp_W([Values(0u)]      uint rd,
                              [Values(1u, 31u)] uint rn,
                              [ValueSource("_W_")] [Random(RndCnt)] uint wn,
@@ -92,7 +92,7 @@ namespace Ryujinx.Tests.Cpu
             CompareAgainstUnicorn();
         }
 
-        [Test, Pairwise, Description("DUP <Vd>.<T>, <R><n>")]
+        [Test, Pairwise, Description("DUP <Vd>.<T>, X<n>")]
         public void Dup_Gp_X([Values(0u)]      uint rd,
                              [Values(1u, 31u)] uint rn,
                              [ValueSource("_X_")] [Random(RndCnt)] ulong xn)
@@ -150,7 +150,7 @@ namespace Ryujinx.Tests.Cpu
 
         [Test, Pairwise, Description("DUP S0, V1.S[<index>]")]
         public void Dup_S_S([ValueSource("_2S_")] [Random(RndCnt)] ulong a,
-                            [Range(0u, 3u)] uint index)
+                            [Values(0u, 1u, 2u, 3u)] uint index)
         {
             const int size = 2;
 
@@ -170,7 +170,7 @@ namespace Ryujinx.Tests.Cpu
 
         [Test, Pairwise, Description("DUP D0, V1.D[<index>]")]
         public void Dup_S_D([ValueSource("_1D_")] [Random(RndCnt)] ulong a,
-                            [Range(0u, 1u)] uint index)
+                            [Values(0u, 1u)] uint index)
         {
             const int size = 3;
 
@@ -243,7 +243,7 @@ namespace Ryujinx.Tests.Cpu
                                 [Values(1u, 0u)] uint rn,
                                 [ValueSource("_2S_")] [Random(RndCnt)] ulong z,
                                 [ValueSource("_2S_")] [Random(RndCnt)] ulong a,
-                                [Range(0u, 3u)] uint index,
+                                [Values(0u, 1u, 2u, 3u)] uint index,
                                 [Values(0b0u, 0b1u)] uint q) // <2S, 4S>
         {
             const int size = 2;
@@ -268,7 +268,7 @@ namespace Ryujinx.Tests.Cpu
                              [Values(1u, 0u)] uint rn,
                              [ValueSource("_1D_")] [Random(RndCnt)] ulong z,
                              [ValueSource("_1D_")] [Random(RndCnt)] ulong a,
-                             [Range(0u, 1u)] uint index,
+                             [Values(0u, 1u)] uint index,
                              [Values(0b1u)] uint q) // <2D>
         {
             const int size = 3;
@@ -288,13 +288,206 @@ namespace Ryujinx.Tests.Cpu
             CompareAgainstUnicorn();
         }
 
-        [Test, Pairwise, Description("SMOV <Wd>, <Vn>.<Ts>[<index>]")]
-        public void Smov_S_W([Values(0u, 31u)] uint rd,
-                             [Values(1u)]      uint rn,
-                             [ValueSource("_8B4H_")] [Random(RndCnt)] ulong a,
-                             [Values(0, 1)] int size, // <B, H>
-                             [Values(0u, 1u, 2u, 3u)] uint index)
+        [Test, Pairwise, Description("INS <Vd>.B[<index>], W<n>")]
+        public void Ins_Gp_WB([Values(0u)]      uint rd,
+                              [Values(1u, 31u)] uint rn,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong z,
+                              [ValueSource("_W_")] [Random(RndCnt)] uint wn,
+                              [Values(0u, 15u)] [Random(1u, 14u, RndCntIndex)] uint index)
         {
+            const int size = 0;
+
+            uint imm5 = (index << (size + 1) | 1u << size) & 0x1Fu;
+
+            uint opcode = 0x4E001C00; // RESERVED
+            opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
+            opcode |= (imm5 << 16);
+
+            Vector128<float> v0 = MakeVectorE0E1(z, z);
+
+            SingleOpcode(opcode, x1: wn, v0: v0);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("INS <Vd>.H[<index>], W<n>")]
+        public void Ins_Gp_WH([Values(0u)]      uint rd,
+                              [Values(1u, 31u)] uint rn,
+                              [ValueSource("_4H_")] [Random(RndCnt)] ulong z,
+                              [ValueSource("_W_")] [Random(RndCnt)] uint wn,
+                              [Values(0u, 7u)] [Random(1u, 6u, RndCntIndex)] uint index)
+        {
+            const int size = 1;
+
+            uint imm5 = (index << (size + 1) | 1u << size) & 0x1Fu;
+
+            uint opcode = 0x4E001C00; // RESERVED
+            opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
+            opcode |= (imm5 << 16);
+
+            Vector128<float> v0 = MakeVectorE0E1(z, z);
+
+            SingleOpcode(opcode, x1: wn, v0: v0);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("INS <Vd>.S[<index>], W<n>")]
+        public void Ins_Gp_WS([Values(0u)]      uint rd,
+                              [Values(1u, 31u)] uint rn,
+                              [ValueSource("_2S_")] [Random(RndCnt)] ulong z,
+                              [ValueSource("_W_")] [Random(RndCnt)] uint wn,
+                              [Values(0u, 1u, 2u, 3u)] uint index)
+        {
+            const int size = 2;
+
+            uint imm5 = (index << (size + 1) | 1u << size) & 0x1Fu;
+
+            uint opcode = 0x4E001C00; // RESERVED
+            opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
+            opcode |= (imm5 << 16);
+
+            Vector128<float> v0 = MakeVectorE0E1(z, z);
+
+            SingleOpcode(opcode, x1: wn, v0: v0);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("INS <Vd>.D[<index>], X<n>")]
+        public void Ins_Gp_XD([Values(0u)]      uint rd,
+                              [Values(1u, 31u)] uint rn,
+                              [ValueSource("_1D_")] [Random(RndCnt)] ulong z,
+                              [ValueSource("_X_")] [Random(RndCnt)] ulong xn,
+                              [Values(0u, 1u)] uint index)
+        {
+            const int size = 3;
+
+            uint imm5 = (index << (size + 1) | 1u << size) & 0x1Fu;
+
+            uint opcode = 0x4E001C00; // RESERVED
+            opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
+            opcode |= (imm5 << 16);
+
+            Vector128<float> v0 = MakeVectorE0E1(z, z);
+
+            SingleOpcode(opcode, x1: xn, v0: v0);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("INS <Vd>.B[<index1>], <Vn>.B[<index2>]")]
+        public void Ins_V_BB([Values(0u)]     uint rd,
+                             [Values(1u, 0u)] uint rn,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong z,
+                             [ValueSource("_8B_")] [Random(RndCnt)] ulong a,
+                             [Values(0u, 15u)] [Random(1u, 14u, RndCntIndex)] uint dstIndex,
+                             [Values(0u, 15u)] [Random(1u, 14u, RndCntIndex)] uint srcIndex)
+        {
+            const int size = 0;
+
+            uint imm5 = (dstIndex << (size + 1) | 1u << size) & 0x1Fu;
+            uint imm4 = (srcIndex << size) & 0xFu;
+
+            uint opcode = 0x6E000400; // RESERVED
+            opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
+            opcode |= (imm5 << 16);
+            opcode |= (imm4 << 11);
+
+            Vector128<float> v0 = MakeVectorE0E1(z, z);
+            Vector128<float> v1 = MakeVectorE0E1(a, a);
+
+            SingleOpcode(opcode, v0: v0, v1: v1);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("INS <Vd>.H[<index1>], <Vn>.H[<index2>]")]
+        public void Ins_V_HH([Values(0u)]     uint rd,
+                             [Values(1u, 0u)] uint rn,
+                             [ValueSource("_4H_")] [Random(RndCnt)] ulong z,
+                             [ValueSource("_4H_")] [Random(RndCnt)] ulong a,
+                             [Values(0u, 7u)] [Random(1u, 6u, RndCntIndex)] uint dstIndex,
+                             [Values(0u, 7u)] [Random(1u, 6u, RndCntIndex)] uint srcIndex)
+        {
+            const int size = 1;
+
+            uint imm5 = (dstIndex << (size + 1) | 1u << size) & 0x1Fu;
+            uint imm4 = (srcIndex << size) & 0xFu;
+
+            uint opcode = 0x6E000400; // RESERVED
+            opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
+            opcode |= (imm5 << 16);
+            opcode |= (imm4 << 11);
+
+            Vector128<float> v0 = MakeVectorE0E1(z, z);
+            Vector128<float> v1 = MakeVectorE0E1(a, a);
+
+            SingleOpcode(opcode, v0: v0, v1: v1);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("INS <Vd>.S[<index1>], <Vn>.S[<index2>]")]
+        public void Ins_V_SS([Values(0u)]     uint rd,
+                             [Values(1u, 0u)] uint rn,
+                             [ValueSource("_2S_")] [Random(RndCnt)] ulong z,
+                             [ValueSource("_2S_")] [Random(RndCnt)] ulong a,
+                             [Values(0u, 1u, 2u, 3u)] uint dstIndex,
+                             [Values(0u, 1u, 2u, 3u)] uint srcIndex)
+        {
+            const int size = 2;
+
+            uint imm5 = (dstIndex << (size + 1) | 1u << size) & 0x1Fu;
+            uint imm4 = (srcIndex << size) & 0xFu;
+
+            uint opcode = 0x6E000400; // RESERVED
+            opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
+            opcode |= (imm5 << 16);
+            opcode |= (imm4 << 11);
+
+            Vector128<float> v0 = MakeVectorE0E1(z, z);
+            Vector128<float> v1 = MakeVectorE0E1(a, a);
+
+            SingleOpcode(opcode, v0: v0, v1: v1);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("INS <Vd>.D[<index1>], <Vn>.D[<index2>]")]
+        public void Ins_V_DD([Values(0u)]     uint rd,
+                             [Values(1u, 0u)] uint rn,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong z,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong a,
+                             [Values(0u, 1u)] uint dstIndex,
+                             [Values(0u, 1u)] uint srcIndex)
+        {
+            const int size = 3;
+
+            uint imm5 = (dstIndex << (size + 1) | 1u << size) & 0x1Fu;
+            uint imm4 = (srcIndex << size) & 0xFu;
+
+            uint opcode = 0x6E000400; // RESERVED
+            opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
+            opcode |= (imm5 << 16);
+            opcode |= (imm4 << 11);
+
+            Vector128<float> v0 = MakeVectorE0E1(z, z);
+            Vector128<float> v1 = MakeVectorE0E1(a, a);
+
+            SingleOpcode(opcode, v0: v0, v1: v1);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("SMOV <Wd>, <Vn>.B[<index>]")]
+        public void Smov_S_BW([Values(0u, 31u)] uint rd,
+                              [Values(1u)]      uint rn,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong a,
+                              [Values(0u, 15u)] [Random(1u, 14u, RndCntIndex)] uint index)
+        {
+            const int size = 0;
+
             uint imm5 = (index << (size + 1) | 1u << size) & 0x1Fu;
 
             uint opcode = 0x0E002C00; // RESERVED
@@ -303,20 +496,44 @@ namespace Ryujinx.Tests.Cpu
 
             ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32;
             uint w31 = TestContext.CurrentContext.Random.NextUInt();
-            Vector128<float> v1 = MakeVectorE0(a);
+            Vector128<float> v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, x0: x0, x31: w31, v1: v1);
 
             CompareAgainstUnicorn();
         }
 
-        [Test, Pairwise, Description("SMOV <Xd>, <Vn>.<Ts>[<index>]")]
-        public void Smov_S_X([Values(0u, 31u)] uint rd,
-                             [Values(1u)]      uint rn,
-                             [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong a,
-                             [Values(0, 1, 2)] int size, // <B, H, S>
-                             [Values(0u, 1u)] uint index)
+        [Test, Pairwise, Description("SMOV <Wd>, <Vn>.H[<index>]")]
+        public void Smov_S_HW([Values(0u, 31u)] uint rd,
+                              [Values(1u)]      uint rn,
+                              [ValueSource("_4H_")] [Random(RndCnt)] ulong a,
+                              [Values(0u, 7u)] [Random(1u, 6u, RndCntIndex)] uint index)
         {
+            const int size = 1;
+
+            uint imm5 = (index << (size + 1) | 1u << size) & 0x1Fu;
+
+            uint opcode = 0x0E002C00; // RESERVED
+            opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
+            opcode |= (imm5 << 16);
+
+            ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32;
+            uint w31 = TestContext.CurrentContext.Random.NextUInt();
+            Vector128<float> v1 = MakeVectorE0E1(a, a);
+
+            SingleOpcode(opcode, x0: x0, x31: w31, v1: v1);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("SMOV <Xd>, <Vn>.B[<index>]")]
+        public void Smov_S_BX([Values(0u, 31u)] uint rd,
+                              [Values(1u)]      uint rn,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong a,
+                              [Values(0u, 15u)] [Random(1u, 14u, RndCntIndex)] uint index)
+        {
+            const int size = 0;
+
             uint imm5 = (index << (size + 1) | 1u << size) & 0x1Fu;
 
             uint opcode = 0x4E002C00; // RESERVED
@@ -324,20 +541,65 @@ namespace Ryujinx.Tests.Cpu
             opcode |= (imm5 << 16);
 
             ulong x31 = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v1 = MakeVectorE0(a);
+            Vector128<float> v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, x31: x31, v1: v1);
 
             CompareAgainstUnicorn();
         }
 
-        [Test, Pairwise, Description("UMOV <Wd>, <Vn>.<Ts>[<index>]")]
-        public void Umov_S_W([Values(0u, 31u)] uint rd,
-                             [Values(1u)]      uint rn,
-                             [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong a,
-                             [Values(0, 1, 2)] int size, // <B, H, S>
-                             [Values(0u, 1u)] uint index)
+        [Test, Pairwise, Description("SMOV <Xd>, <Vn>.H[<index>]")]
+        public void Smov_S_HX([Values(0u, 31u)] uint rd,
+                              [Values(1u)]      uint rn,
+                              [ValueSource("_4H_")] [Random(RndCnt)] ulong a,
+                              [Values(0u, 7u)] [Random(1u, 6u, RndCntIndex)] uint index)
         {
+            const int size = 1;
+
+            uint imm5 = (index << (size + 1) | 1u << size) & 0x1Fu;
+
+            uint opcode = 0x4E002C00; // RESERVED
+            opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
+            opcode |= (imm5 << 16);
+
+            ulong x31 = TestContext.CurrentContext.Random.NextULong();
+            Vector128<float> v1 = MakeVectorE0E1(a, a);
+
+            SingleOpcode(opcode, x31: x31, v1: v1);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("SMOV <Xd>, <Vn>.S[<index>]")]
+        public void Smov_S_SX([Values(0u, 31u)] uint rd,
+                              [Values(1u)]      uint rn,
+                              [ValueSource("_2S_")] [Random(RndCnt)] ulong a,
+                              [Values(0u, 1u, 2u, 3u)] uint index)
+        {
+            const int size = 2;
+
+            uint imm5 = (index << (size + 1) | 1u << size) & 0x1Fu;
+
+            uint opcode = 0x4E002C00; // RESERVED
+            opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
+            opcode |= (imm5 << 16);
+
+            ulong x31 = TestContext.CurrentContext.Random.NextULong();
+            Vector128<float> v1 = MakeVectorE0E1(a, a);
+
+            SingleOpcode(opcode, x31: x31, v1: v1);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("UMOV <Wd>, <Vn>.B[<index>]")]
+        public void Umov_S_BW([Values(0u, 31u)] uint rd,
+                              [Values(1u)]      uint rn,
+                              [ValueSource("_8B_")] [Random(RndCnt)] ulong a,
+                              [Values(0u, 15u)] [Random(1u, 14u, RndCntIndex)] uint index)
+        {
+            const int size = 0;
+
             uint imm5 = (index << (size + 1) | 1u << size) & 0x1Fu;
 
             uint opcode = 0x0E003C00; // RESERVED
@@ -346,20 +608,67 @@ namespace Ryujinx.Tests.Cpu
 
             ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32;
             uint w31 = TestContext.CurrentContext.Random.NextUInt();
-            Vector128<float> v1 = MakeVectorE0(a);
+            Vector128<float> v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, x0: x0, x31: w31, v1: v1);
 
             CompareAgainstUnicorn();
         }
 
-        [Test, Pairwise, Description("UMOV <Xd>, <Vn>.<Ts>[<index>]")]
-        public void Umov_S_X([Values(0u, 31u)] uint rd,
-                             [Values(1u)]      uint rn,
-                             [ValueSource("_1D_")] [Random(RndCnt)] ulong a,
-                             [Values(3)] int size, // <D>
-                             [Values(0u)] uint index)
+        [Test, Pairwise, Description("UMOV <Wd>, <Vn>.H[<index>]")]
+        public void Umov_S_HW([Values(0u, 31u)] uint rd,
+                              [Values(1u)]      uint rn,
+                              [ValueSource("_4H_")] [Random(RndCnt)] ulong a,
+                              [Values(0u, 7u)] [Random(1u, 6u, RndCntIndex)] uint index)
         {
+            const int size = 1;
+
+            uint imm5 = (index << (size + 1) | 1u << size) & 0x1Fu;
+
+            uint opcode = 0x0E003C00; // RESERVED
+            opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
+            opcode |= (imm5 << 16);
+
+            ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32;
+            uint w31 = TestContext.CurrentContext.Random.NextUInt();
+            Vector128<float> v1 = MakeVectorE0E1(a, a);
+
+            SingleOpcode(opcode, x0: x0, x31: w31, v1: v1);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("UMOV <Wd>, <Vn>.S[<index>]")]
+        public void Umov_S_SW([Values(0u, 31u)] uint rd,
+                              [Values(1u)]      uint rn,
+                              [ValueSource("_2S_")] [Random(RndCnt)] ulong a,
+                              [Values(0u, 1u, 2u, 3u)] uint index)
+        {
+            const int size = 2;
+
+            uint imm5 = (index << (size + 1) | 1u << size) & 0x1Fu;
+
+            uint opcode = 0x0E003C00; // RESERVED
+            opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
+            opcode |= (imm5 << 16);
+
+            ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32;
+            uint w31 = TestContext.CurrentContext.Random.NextUInt();
+            Vector128<float> v1 = MakeVectorE0E1(a, a);
+
+            SingleOpcode(opcode, x0: x0, x31: w31, v1: v1);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("UMOV <Xd>, <Vn>.D[<index>]")]
+        public void Umov_S_DX([Values(0u, 31u)] uint rd,
+                              [Values(1u)]      uint rn,
+                              [ValueSource("_1D_")] [Random(RndCnt)] ulong a,
+                              [Values(0u, 1u)] uint index)
+        {
+            const int size = 3;
+
             uint imm5 = (index << (size + 1) | 1u << size) & 0x1Fu;
 
             uint opcode = 0x4E003C00; // RESERVED
@@ -367,7 +676,7 @@ namespace Ryujinx.Tests.Cpu
             opcode |= (imm5 << 16);
 
             ulong x31 = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v1 = MakeVectorE0(a);
+            Vector128<float> v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, x31: x31, v1: v1);
 
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs
index 1c418341b6..d9b8280135 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs
@@ -202,7 +202,7 @@ namespace Ryujinx.Tests.Cpu
 #endregion
 
 #region "ValueSource (Opcodes)"
-        private static uint[] _F_Add_Div_Mul_Mulx_Sub_S_S_()
+        private static uint[] _F_Abd_Add_Div_Mul_Mulx_Nmul_Sub_S_S_()
         {
             return new uint[]
             {
@@ -211,11 +211,12 @@ namespace Ryujinx.Tests.Cpu
                 0x1E221820u, // FDIV  S0, S1, S2
                 0x1E220820u, // FMUL  S0, S1, S2
                 0x5E22DC20u, // FMULX S0, S1, S2
+                0x1E228820u, // FNMUL S0, S1, S2
                 0x1E223820u  // FSUB  S0, S1, S2
             };
         }
 
-        private static uint[] _F_Add_Div_Mul_Mulx_Sub_S_D_()
+        private static uint[] _F_Abd_Add_Div_Mul_Mulx_Nmul_Sub_S_D_()
         {
             return new uint[]
             {
@@ -224,11 +225,12 @@ namespace Ryujinx.Tests.Cpu
                 0x1E621820u, // FDIV  D0, D1, D2
                 0x1E620820u, // FMUL  D0, D1, D2
                 0x5E62DC20u, // FMULX D0, D1, D2
+                0x1E628820u, // FNMUL D0, D1, D2
                 0x1E623820u  // FSUB  D0, D1, D2
             };
         }
 
-        private static uint[] _F_Add_Div_Mul_Mulx_Sub_P_V_2S_4S_()
+        private static uint[] _F_Abd_Add_Div_Mul_Mulx_Sub_P_V_2S_4S_()
         {
             return new uint[]
             {
@@ -242,7 +244,7 @@ namespace Ryujinx.Tests.Cpu
             };
         }
 
-        private static uint[] _F_Add_Div_Mul_Mulx_Sub_P_V_2D_()
+        private static uint[] _F_Abd_Add_Div_Mul_Mulx_Sub_P_V_2D_()
         {
             return new uint[]
             {
@@ -314,21 +316,25 @@ namespace Ryujinx.Tests.Cpu
             };
         }
 
-        private static uint[] _F_Madd_Msub_S_S_()
+        private static uint[] _F_Madd_Msub_Nmadd_Nmsub_S_S_()
         {
             return new uint[]
             {
-                0x1F020C20u, // FMADD S0, S1, S2, S3
-                0x1F028C20u  // FMSUB S0, S1, S2, S3
+                0x1F020C20u, // FMADD  S0, S1, S2, S3
+                0x1F028C20u, // FMSUB  S0, S1, S2, S3
+                0x1F220C20u, // FNMADD S0, S1, S2, S3
+                0x1F228C20u  // FNMSUB S0, S1, S2, S3
             };
         }
 
-        private static uint[] _F_Madd_Msub_S_D_()
+        private static uint[] _F_Madd_Msub_Nmadd_Nmsub_S_D_()
         {
             return new uint[]
             {
-                0x1F420C20u, // FMADD D0, D1, D2, D3
-                0x1F428C20u  // FMSUB D0, D1, D2, D3
+                0x1F420C20u, // FMADD  D0, D1, D2, D3
+                0x1F428C20u, // FMSUB  D0, D1, D2, D3
+                0x1F620C20u, // FNMADD D0, D1, D2, D3
+                0x1F628C20u  // FNMSUB D0, D1, D2, D3
             };
         }
 
@@ -475,14 +481,44 @@ namespace Ryujinx.Tests.Cpu
             };
         }
 
-        private static uint[] _S_Max_Min_P_V_()
+        private static uint[] _SU_Max_Min_P_V_()
         {
             return new uint[]
             {
                 0x0E206400u, // SMAX  V0.8B, V0.8B, V0.8B
                 0x0E20A400u, // SMAXP V0.8B, V0.8B, V0.8B
                 0x0E206C00u, // SMIN  V0.8B, V0.8B, V0.8B
-                0x0E20AC00u  // SMINP V0.8B, V0.8B, V0.8B
+                0x0E20AC00u, // SMINP V0.8B, V0.8B, V0.8B
+                0x2E206400u, // UMAX  V0.8B, V0.8B, V0.8B
+                0x2E20A400u, // UMAXP V0.8B, V0.8B, V0.8B
+                0x2E206C00u, // UMIN  V0.8B, V0.8B, V0.8B
+                0x2E20AC00u  // UMINP V0.8B, V0.8B, V0.8B
+            };
+        }
+
+        private static uint[] _SU_Mlal_Mlsl_Mull_V_8B8H_4H4S_2S2D_()
+        {
+            return new uint[]
+            {
+                0x0E208000u, // SMLAL V0.8H, V0.8B, V0.8B
+                0x0E20A000u, // SMLSL V0.8H, V0.8B, V0.8B
+                0x0E20C000u, // SMULL V0.8H, V0.8B, V0.8B
+                0x2E208000u, // UMLAL V0.8H, V0.8B, V0.8B
+                0x2E20A000u, // UMLSL V0.8H, V0.8B, V0.8B
+                0x2E20C000u  // UMULL V0.8H, V0.8B, V0.8B
+            };
+        }
+
+        private static uint[] _SU_Mlal_Mlsl_Mull_V_16B8H_8H4S_4S2D_()
+        {
+            return new uint[]
+            {
+                0x4E208000u, // SMLAL2 V0.8H, V0.16B, V0.16B
+                0x4E20A000u, // SMLSL2 V0.8H, V0.16B, V0.16B
+                0x4E20C000u, // SMULL2 V0.8H, V0.16B, V0.16B
+                0x6E208000u, // UMLAL2 V0.8H, V0.16B, V0.16B
+                0x6E20A000u, // UMLSL2 V0.8H, V0.16B, V0.16B
+                0x6E20C000u  // UMULL2 V0.8H, V0.16B, V0.16B
             };
         }
 
@@ -515,17 +551,6 @@ namespace Ryujinx.Tests.Cpu
                 0x6E204400u  // USHL   V0.16B, V0.16B, V0.16B
             };
         }
-
-        private static uint[] _U_Max_Min_P_V_()
-        {
-            return new uint[]
-            {
-                0x2E206400u, // UMAX  V0.8B, V0.8B, V0.8B
-                0x2E20A400u, // UMAXP V0.8B, V0.8B, V0.8B
-                0x2E206C00u, // UMIN  V0.8B, V0.8B, V0.8B
-                0x2E20AC00u  // UMINP V0.8B, V0.8B, V0.8B
-            };
-        }
 #endregion
 
         private const int RndCnt = 2;
@@ -1311,9 +1336,9 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Pairwise] [Explicit]
-        public void F_Add_Div_Mul_Mulx_Sub_S_S([ValueSource("_F_Add_Div_Mul_Mulx_Sub_S_S_")] uint opcodes,
-                                               [ValueSource("_1S_F_")] ulong a,
-                                               [ValueSource("_1S_F_")] ulong b)
+        public void F_Abd_Add_Div_Mul_Mulx_Nmul_Sub_S_S([ValueSource("_F_Abd_Add_Div_Mul_Mulx_Nmul_Sub_S_S_")] uint opcodes,
+                                                        [ValueSource("_1S_F_")] ulong a,
+                                                        [ValueSource("_1S_F_")] ulong b)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
             Vector128<float> v0 = MakeVectorE0E1(z, z);
@@ -1331,9 +1356,9 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Pairwise] [Explicit]
-        public void F_Add_Div_Mul_Mulx_Sub_S_D([ValueSource("_F_Add_Div_Mul_Mulx_Sub_S_D_")] uint opcodes,
-                                               [ValueSource("_1D_F_")] ulong a,
-                                               [ValueSource("_1D_F_")] ulong b)
+        public void F_Abd_Add_Div_Mul_Mulx_Nmul_Sub_S_D([ValueSource("_F_Abd_Add_Div_Mul_Mulx_Nmul_Sub_S_D_")] uint opcodes,
+                                                        [ValueSource("_1D_F_")] ulong a,
+                                                        [ValueSource("_1D_F_")] ulong b)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
             Vector128<float> v0 = MakeVectorE1(z);
@@ -1351,14 +1376,14 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Pairwise] [Explicit]
-        public void F_Add_Div_Mul_Mulx_Sub_P_V_2S_4S([ValueSource("_F_Add_Div_Mul_Mulx_Sub_P_V_2S_4S_")] uint opcodes,
-                                                     [Values(0u)]     uint rd,
-                                                     [Values(1u, 0u)] uint rn,
-                                                     [Values(2u, 0u)] uint rm,
-                                                     [ValueSource("_2S_F_")] ulong z,
-                                                     [ValueSource("_2S_F_")] ulong a,
-                                                     [ValueSource("_2S_F_")] ulong b,
-                                                     [Values(0b0u, 0b1u)] uint q) // <2S, 4S>
+        public void F_Abd_Add_Div_Mul_Mulx_Sub_P_V_2S_4S([ValueSource("_F_Abd_Add_Div_Mul_Mulx_Sub_P_V_2S_4S_")] uint opcodes,
+                                                         [Values(0u)]     uint rd,
+                                                         [Values(1u, 0u)] uint rn,
+                                                         [Values(2u, 0u)] uint rm,
+                                                         [ValueSource("_2S_F_")] ulong z,
+                                                         [ValueSource("_2S_F_")] ulong a,
+                                                         [ValueSource("_2S_F_")] ulong b,
+                                                         [Values(0b0u, 0b1u)] uint q) // <2S, 4S>
         {
             opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((q & 1) << 30);
@@ -1378,13 +1403,13 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Pairwise] [Explicit]
-        public void F_Add_Div_Mul_Mulx_Sub_P_V_2D([ValueSource("_F_Add_Div_Mul_Mulx_Sub_P_V_2D_")] uint opcodes,
-                                                  [Values(0u)]     uint rd,
-                                                  [Values(1u, 0u)] uint rn,
-                                                  [Values(2u, 0u)] uint rm,
-                                                  [ValueSource("_1D_F_")] ulong z,
-                                                  [ValueSource("_1D_F_")] ulong a,
-                                                  [ValueSource("_1D_F_")] ulong b)
+        public void F_Abd_Add_Div_Mul_Mulx_Sub_P_V_2D([ValueSource("_F_Abd_Add_Div_Mul_Mulx_Sub_P_V_2D_")] uint opcodes,
+                                                      [Values(0u)]     uint rd,
+                                                      [Values(1u, 0u)] uint rn,
+                                                      [Values(2u, 0u)] uint rm,
+                                                      [ValueSource("_1D_F_")] ulong z,
+                                                      [ValueSource("_1D_F_")] ulong a,
+                                                      [ValueSource("_1D_F_")] ulong b)
         {
             opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
@@ -1527,10 +1552,10 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Pairwise] [Explicit] // Fused.
-        public void F_Madd_Msub_S_S([ValueSource("_F_Madd_Msub_S_S_")] uint opcodes,
-                                    [ValueSource("_1S_F_")] ulong a,
-                                    [ValueSource("_1S_F_")] ulong b,
-                                    [ValueSource("_1S_F_")] ulong c)
+        public void F_Madd_Msub_Nmadd_Nmsub_S_S([ValueSource("_F_Madd_Msub_Nmadd_Nmsub_S_S_")] uint opcodes,
+                                                [ValueSource("_1S_F_")] ulong a,
+                                                [ValueSource("_1S_F_")] ulong b,
+                                                [ValueSource("_1S_F_")] ulong c)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
             Vector128<float> v0 = MakeVectorE0E1(z, z);
@@ -1549,10 +1574,10 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Pairwise] [Explicit] // Fused.
-        public void F_Madd_Msub_S_D([ValueSource("_F_Madd_Msub_S_D_")] uint opcodes,
-                                    [ValueSource("_1D_F_")] ulong a,
-                                    [ValueSource("_1D_F_")] ulong b,
-                                    [ValueSource("_1D_F_")] ulong c)
+        public void F_Madd_Msub_Nmadd_Nmsub_S_D([ValueSource("_F_Madd_Msub_Nmadd_Nmsub_S_D_")] uint opcodes,
+                                                [ValueSource("_1D_F_")] ulong a,
+                                                [ValueSource("_1D_F_")] ulong b,
+                                                [ValueSource("_1D_F_")] ulong c)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
             Vector128<float> v0 = MakeVectorE1(z);
@@ -2411,15 +2436,15 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Pairwise]
-        public void S_Max_Min_P_V([ValueSource("_S_Max_Min_P_V_")] uint opcodes,
-                                  [Values(0u)]     uint rd,
-                                  [Values(1u, 0u)] uint rn,
-                                  [Values(2u, 0u)] uint rm,
-                                  [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong z,
-                                  [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong a,
-                                  [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong b,
-                                  [Values(0b00u, 0b01u, 0b10u)] uint size, // Q0: <8B,  4H, 2S>
-                                  [Values(0b0u, 0b1u)] uint q)             // Q1: <16B, 8H, 4S>
+        public void SU_Max_Min_P_V([ValueSource("_SU_Max_Min_P_V_")] uint opcodes,
+                                   [Values(0u)]     uint rd,
+                                   [Values(1u, 0u)] uint rn,
+                                   [Values(2u, 0u)] uint rm,
+                                   [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong z,
+                                   [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong a,
+                                   [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong b,
+                                   [Values(0b00u, 0b01u, 0b10u)] uint size, // Q0: <8B,  4H, 2S>
+                                   [Values(0b0u, 0b1u)] uint q)             // Q1: <16B, 8H, 4S>
         {
             opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((size & 3) << 22);
@@ -2434,90 +2459,46 @@ namespace Ryujinx.Tests.Cpu
             CompareAgainstUnicorn();
         }
 
-        [Test, Pairwise, Description("SMLAL{2} <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb>")]
-        public void Smlal_V_8B8H_4H4S_2S2D([Values(0u)]     uint rd,
-                                           [Values(1u, 0u)] uint rn,
-                                           [Values(2u, 0u)] uint rm,
-                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong z,
-                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong a,
-                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong b,
-                                           [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B8H, 4H4S, 2S2D>
+        [Test, Pairwise]
+        public void SU_Mlal_Mlsl_Mull_V_8B8H_4H4S_2S2D([ValueSource("_SU_Mlal_Mlsl_Mull_V_8B8H_4H4S_2S2D_")] uint opcodes,
+                                                       [Values(0u)]     uint rd,
+                                                       [Values(1u, 0u)] uint rn,
+                                                       [Values(2u, 0u)] uint rm,
+                                                       [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong z,
+                                                       [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong a,
+                                                       [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong b,
+                                                       [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B8H, 4H4S, 2S2D>
         {
-            uint opcode = 0x0E208000; // SMLAL V0.8H, V0.8B, V0.8B
-            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
-            opcode |= ((size & 3) << 22);
+            opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
+            opcodes |= ((size & 3) << 22);
 
             Vector128<float> v0 = MakeVectorE0E1(z, z);
             Vector128<float> v1 = MakeVectorE0(a);
             Vector128<float> v2 = MakeVectorE0(b);
 
-            SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
+            SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
 
             CompareAgainstUnicorn();
         }
 
-        [Test, Pairwise, Description("SMLAL{2} <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb>")]
-        public void Smlal_V_16B8H_8H4S_4S2D([Values(0u)]     uint rd,
-                                            [Values(1u, 0u)] uint rn,
-                                            [Values(2u, 0u)] uint rm,
-                                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong z,
-                                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong a,
-                                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong b,
-                                            [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H, 8H4S, 4S2D>
+        [Test, Pairwise]
+        public void SU_Mlal_Mlsl_Mull_V_16B8H_8H4S_4S2D([ValueSource("_SU_Mlal_Mlsl_Mull_V_16B8H_8H4S_4S2D_")] uint opcodes,
+                                                        [Values(0u)]     uint rd,
+                                                        [Values(1u, 0u)] uint rn,
+                                                        [Values(2u, 0u)] uint rm,
+                                                        [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong z,
+                                                        [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong a,
+                                                        [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong b,
+                                                        [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H, 8H4S, 4S2D>
         {
-            uint opcode = 0x4E208000; // SMLAL2 V0.8H, V0.16B, V0.16B
-            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
-            opcode |= ((size & 3) << 22);
+            opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
+            opcodes |= ((size & 3) << 22);
 
             Vector128<float> v0 = MakeVectorE0E1(z, z);
             Vector128<float> v1 = MakeVectorE1(a);
             Vector128<float> v2 = MakeVectorE1(b);
 
-            SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
-
-            CompareAgainstUnicorn();
-        }
-
-        [Test, Pairwise, Description("SMLSL{2} <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb>")]
-        public void Smlsl_V_8B8H_4H4S_2S2D([Values(0u)]     uint rd,
-                                           [Values(1u, 0u)] uint rn,
-                                           [Values(2u, 0u)] uint rm,
-                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong z,
-                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong a,
-                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong b,
-                                           [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B8H, 4H4S, 2S2D>
-        {
-            uint opcode = 0x0E20A000; // SMLSL V0.8H, V0.8B, V0.8B
-            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
-            opcode |= ((size & 3) << 22);
-
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
-
-            SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
-
-            CompareAgainstUnicorn();
-        }
-
-        [Test, Pairwise, Description("SMLSL{2} <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb>")]
-        public void Smlsl_V_16B8H_8H4S_4S2D([Values(0u)]     uint rd,
-                                            [Values(1u, 0u)] uint rn,
-                                            [Values(2u, 0u)] uint rm,
-                                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong z,
-                                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong a,
-                                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong b,
-                                            [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H, 8H4S, 4S2D>
-        {
-            uint opcode = 0x4E20A000; // SMLSL2 V0.8H, V0.16B, V0.16B
-            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
-            opcode |= ((size & 3) << 22);
-
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE1(a);
-            Vector128<float> v2 = MakeVectorE1(b);
-
-            SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
+            SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
 
             CompareAgainstUnicorn();
         }
@@ -3105,9 +3086,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            Vector128<float> v0 = MakeVectorE0E1(z, ~z);
+            Vector128<float> v1 = MakeVectorE0E1(a, ~a);
+            Vector128<float> v2 = MakeVectorE0E1(b, ~b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3149,9 +3130,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            Vector128<float> v0 = MakeVectorE0E1(z, ~z);
+            Vector128<float> v1 = MakeVectorE0E1(a, ~a);
+            Vector128<float> v2 = MakeVectorE0E1(b, ~b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3510,118 +3491,6 @@ namespace Ryujinx.Tests.Cpu
             CompareAgainstUnicorn();
         }
 
-        [Test, Pairwise]
-        public void U_Max_Min_P_V([ValueSource("_U_Max_Min_P_V_")] uint opcodes,
-                                  [Values(0u)]     uint rd,
-                                  [Values(1u, 0u)] uint rn,
-                                  [Values(2u, 0u)] uint rm,
-                                  [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong z,
-                                  [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong a,
-                                  [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong b,
-                                  [Values(0b00u, 0b01u, 0b10u)] uint size, // Q0: <8B,  4H, 2S>
-                                  [Values(0b0u, 0b1u)] uint q)             // Q1: <16B, 8H, 4S>
-        {
-            opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
-            opcodes |= ((size & 3) << 22);
-            opcodes |= ((q & 1) << 30);
-
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
-            Vector128<float> v2 = MakeVectorE0E1(b, b * q);
-
-            SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
-
-            CompareAgainstUnicorn();
-        }
-
-        [Test, Pairwise, Description("UMLAL{2} <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb>")]
-        public void Umlal_V_8B8H_4H4S_2S2D([Values(0u)]     uint rd,
-                                           [Values(1u, 0u)] uint rn,
-                                           [Values(2u, 0u)] uint rm,
-                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong z,
-                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong a,
-                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong b,
-                                           [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B8H, 4H4S, 2S2D>
-        {
-            uint opcode = 0x2E208000; // UMLAL V0.8H, V0.8B, V0.8B
-            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
-            opcode |= ((size & 3) << 22);
-
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
-
-            SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
-
-            CompareAgainstUnicorn();
-        }
-
-        [Test, Pairwise, Description("UMLAL{2} <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb>")]
-        public void Umlal_V_16B8H_8H4S_4S2D([Values(0u)]     uint rd,
-                                            [Values(1u, 0u)] uint rn,
-                                            [Values(2u, 0u)] uint rm,
-                                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong z,
-                                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong a,
-                                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong b,
-                                            [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H, 8H4S, 4S2D>
-        {
-            uint opcode = 0x6E208000; // UMLAL2 V0.8H, V0.16B, V0.16B
-            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
-            opcode |= ((size & 3) << 22);
-
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE1(a);
-            Vector128<float> v2 = MakeVectorE1(b);
-
-            SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
-
-            CompareAgainstUnicorn();
-        }
-
-        [Test, Pairwise, Description("UMLSL{2} <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb>")]
-        public void Umlsl_V_8B8H_4H4S_2S2D([Values(0u)]     uint rd,
-                                           [Values(1u, 0u)] uint rn,
-                                           [Values(2u, 0u)] uint rm,
-                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong z,
-                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong a,
-                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong b,
-                                           [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B8H, 4H4S, 2S2D>
-        {
-            uint opcode = 0x2E20A000; // UMLSL V0.8H, V0.8B, V0.8B
-            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
-            opcode |= ((size & 3) << 22);
-
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
-
-            SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
-
-            CompareAgainstUnicorn();
-        }
-
-        [Test, Pairwise, Description("UMLSL{2} <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb>")]
-        public void Umlsl_V_16B8H_8H4S_4S2D([Values(0u)]     uint rd,
-                                            [Values(1u, 0u)] uint rn,
-                                            [Values(2u, 0u)] uint rm,
-                                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong z,
-                                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong a,
-                                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong b,
-                                            [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H, 8H4S, 4S2D>
-        {
-            uint opcode = 0x6E20A000; // UMLSL2 V0.8H, V0.16B, V0.16B
-            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
-            opcode |= ((size & 3) << 22);
-
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE1(a);
-            Vector128<float> v2 = MakeVectorE1(b);
-
-            SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
-
-            CompareAgainstUnicorn();
-        }
-
         [Test, Pairwise, Description("UQADD <V><d>, <V><n>, <V><m>")]
         public void Uqadd_S_B_H_S_D([Values(0u)]     uint rd,
                                     [Values(1u, 0u)] uint rn,
@@ -3921,9 +3790,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            Vector128<float> v0 = MakeVectorE0E1(z, ~z);
+            Vector128<float> v1 = MakeVectorE0E1(a, ~a);
+            Vector128<float> v2 = MakeVectorE0E1(b, ~b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3965,9 +3834,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            Vector128<float> v0 = MakeVectorE0E1(z, ~z);
+            Vector128<float> v1 = MakeVectorE0E1(a, ~a);
+            Vector128<float> v2 = MakeVectorE0E1(b, ~b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -4009,9 +3878,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            Vector128<float> v0 = MakeVectorE0E1(z, ~z);
+            Vector128<float> v1 = MakeVectorE0E1(a, ~a);
+            Vector128<float> v2 = MakeVectorE0E1(b, ~b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -4053,9 +3922,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            Vector128<float> v0 = MakeVectorE0E1(z, ~z);
+            Vector128<float> v1 = MakeVectorE0E1(a, ~a);
+            Vector128<float> v2 = MakeVectorE0E1(b, ~b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs b/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs
index 7fc593a849..64f9bc6cc5 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs
@@ -73,7 +73,8 @@ namespace Ryujinx.Tests.Cpu
         }
 #endregion
 
-        private const int RndCnt = 2;
+        private const int RndCnt      = 2;
+        private const int RndCntIndex = 2;
 
         [Test, Pairwise]
         public void Mla_Mls_Mul_Ve_4H_8H([ValueSource("_Mla_Mls_Mul_Ve_4H_8H_")] uint opcodes,
@@ -83,7 +84,7 @@ namespace Ryujinx.Tests.Cpu
                                          [ValueSource("_4H_")] [Random(RndCnt)] ulong z,
                                          [ValueSource("_4H_")] [Random(RndCnt)] ulong a,
                                          [ValueSource("_4H_")] [Random(RndCnt)] ulong b,
-                                         [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint index,
+                                         [Values(0u, 7u)] [Random(1u, 6u, RndCntIndex)] uint index,
                                          [Values(0b0u, 0b1u)] uint q) // <4H, 8H>
         {
             uint h = (index >> 2) & 1;
@@ -138,7 +139,7 @@ namespace Ryujinx.Tests.Cpu
                                                    [ValueSource("_4H_")] [Random(RndCnt)] ulong z,
                                                    [ValueSource("_4H_")] [Random(RndCnt)] ulong a,
                                                    [ValueSource("_4H_")] [Random(RndCnt)] ulong b,
-                                                   [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint index,
+                                                   [Values(0u, 7u)] [Random(1u, 6u, RndCntIndex)] uint index,
                                                    [Values(0b0u, 0b1u)] uint q) // <4H4S, 8H4S>
         {
             uint h = (index >> 2) & 1;
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs b/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs
index c08949a5ae..54ed044d9f 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs
@@ -212,6 +212,51 @@ namespace Ryujinx.Tests.Cpu
             };
         }
 
+        private static uint[] _Shl_Sli_S_D_()
+        {
+            return new uint[]
+            {
+                0x5F405400u, // SHL D0, D0, #0
+                //0x7F405400u  // SLI D0, D0, #0
+            };
+        }
+
+        private static uint[] _Shl_Sli_V_8B_16B_()
+        {
+            return new uint[]
+            {
+                0x0F085400u, // SHL V0.8B, V0.8B, #0
+                0x2F085400u  // SLI V0.8B, V0.8B, #0
+            };
+        }
+
+        private static uint[] _Shl_Sli_V_4H_8H_()
+        {
+            return new uint[]
+            {
+                0x0F105400u, // SHL V0.4H, V0.4H, #0
+                0x2F105400u  // SLI V0.4H, V0.4H, #0
+            };
+        }
+
+        private static uint[] _Shl_Sli_V_2S_4S_()
+        {
+            return new uint[]
+            {
+                0x0F205400u, // SHL V0.2S, V0.2S, #0
+                0x2F205400u  // SLI V0.2S, V0.2S, #0
+            };
+        }
+
+        private static uint[] _Shl_Sli_V_2D_()
+        {
+            return new uint[]
+            {
+                0x4F405400u, // SHL V0.2D, V0.2D, #0
+                0x6F405400u  // SLI V0.2D, V0.2D, #0
+            };
+        }
+
         private static uint[] _SU_Shll_V_8B8H_16B8H_()
         {
             return new uint[]
@@ -516,113 +561,113 @@ namespace Ryujinx.Tests.Cpu
             CompareAgainstUnicorn(fpTolerances: FpTolerances.UpToOneUlpsD); // unsigned
         }
 
-        [Test, Pairwise, Description("SHL <V><d>, <V><n>, #<shift>")]
-        public void Shl_S_D([Values(0u)]     uint rd,
-                            [Values(1u, 0u)] uint rn,
-                            [ValueSource("_1D_")] [Random(RndCnt)] ulong z,
-                            [ValueSource("_1D_")] [Random(RndCnt)] ulong a,
-                            [Values(0u, 63u)] [Random(1u, 62u, RndCntShift)] uint shift)
+        [Test, Pairwise]
+        public void Shl_Sli_S_D([ValueSource("_Shl_Sli_S_D_")] uint opcodes,
+                                [Values(0u)]     uint rd,
+                                [Values(1u, 0u)] uint rn,
+                                [ValueSource("_1D_")] [Random(RndCnt)] ulong z,
+                                [ValueSource("_1D_")] [Random(RndCnt)] ulong a,
+                                [Values(0u, 63u)] [Random(1u, 62u, RndCntShift)] uint shift)
         {
             uint immHb = (64 + shift) & 0x7F;
 
-            uint opcode = 0x5F405400; // SHL D0, D0, #0
-            opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
-            opcode |= (immHb << 16);
+            opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
+            opcodes |= (immHb << 16);
 
             Vector128<float> v0 = MakeVectorE0E1(z, z);
             Vector128<float> v1 = MakeVectorE0(a);
 
-            SingleOpcode(opcode, v0: v0, v1: v1);
+            SingleOpcode(opcodes, v0: v0, v1: v1);
 
             CompareAgainstUnicorn();
         }
 
-        [Test, Pairwise, Description("SHL <Vd>.<T>, <Vn>.<T>, #<shift>")]
-        public void Shl_V_8B_16B([Values(0u)]     uint rd,
-                                 [Values(1u, 0u)] uint rn,
-                                 [ValueSource("_8B_")] [Random(RndCnt)] ulong z,
-                                 [ValueSource("_8B_")] [Random(RndCnt)] ulong a,
-                                 [Values(0u, 7u)] [Random(1u, 6u, RndCntShift)] uint shift,
-                                 [Values(0b0u, 0b1u)] uint q) // <8B, 16B>
+        [Test, Pairwise]
+        public void Shl_Sli_V_8B_16B([ValueSource("_Shl_Sli_V_8B_16B_")] uint opcodes,
+                                     [Values(0u)]     uint rd,
+                                     [Values(1u, 0u)] uint rn,
+                                     [ValueSource("_8B_")] [Random(RndCnt)] ulong z,
+                                     [ValueSource("_8B_")] [Random(RndCnt)] ulong a,
+                                     [Values(0u, 7u)] [Random(1u, 6u, RndCntShift)] uint shift,
+                                     [Values(0b0u, 0b1u)] uint q) // <8B, 16B>
         {
             uint immHb = (8 + shift) & 0x7F;
 
-            uint opcode = 0x0F085400; // SHL V0.8B, V0.8B, #0
-            opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
-            opcode |= (immHb << 16);
-            opcode |= ((q & 1) << 30);
+            opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
+            opcodes |= (immHb << 16);
+            opcodes |= ((q & 1) << 30);
 
             Vector128<float> v0 = MakeVectorE0E1(z, z);
             Vector128<float> v1 = MakeVectorE0E1(a, a * q);
 
-            SingleOpcode(opcode, v0: v0, v1: v1);
+            SingleOpcode(opcodes, v0: v0, v1: v1);
 
             CompareAgainstUnicorn();
         }
 
-        [Test, Pairwise, Description("SHL <Vd>.<T>, <Vn>.<T>, #<shift>")]
-        public void Shl_V_4H_8H([Values(0u)]     uint rd,
-                                [Values(1u, 0u)] uint rn,
-                                [ValueSource("_4H_")] [Random(RndCnt)] ulong z,
-                                [ValueSource("_4H_")] [Random(RndCnt)] ulong a,
-                                [Values(0u, 15u)] [Random(1u, 14u, RndCntShift)] uint shift,
-                                [Values(0b0u, 0b1u)] uint q) // <4H, 8H>
+        [Test, Pairwise]
+        public void Shl_Sli_V_4H_8H([ValueSource("_Shl_Sli_V_4H_8H_")] uint opcodes,
+                                    [Values(0u)]     uint rd,
+                                    [Values(1u, 0u)] uint rn,
+                                    [ValueSource("_4H_")] [Random(RndCnt)] ulong z,
+                                    [ValueSource("_4H_")] [Random(RndCnt)] ulong a,
+                                    [Values(0u, 15u)] [Random(1u, 14u, RndCntShift)] uint shift,
+                                    [Values(0b0u, 0b1u)] uint q) // <4H, 8H>
         {
             uint immHb = (16 + shift) & 0x7F;
 
-            uint opcode = 0x0F105400; // SHL V0.4H, V0.4H, #0
-            opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
-            opcode |= (immHb << 16);
-            opcode |= ((q & 1) << 30);
+            opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
+            opcodes |= (immHb << 16);
+            opcodes |= ((q & 1) << 30);
 
             Vector128<float> v0 = MakeVectorE0E1(z, z);
             Vector128<float> v1 = MakeVectorE0E1(a, a * q);
 
-            SingleOpcode(opcode, v0: v0, v1: v1);
+            SingleOpcode(opcodes, v0: v0, v1: v1);
 
             CompareAgainstUnicorn();
         }
 
-        [Test, Pairwise, Description("SHL <Vd>.<T>, <Vn>.<T>, #<shift>")]
-        public void Shl_V_2S_4S([Values(0u)]     uint rd,
-                                [Values(1u, 0u)] uint rn,
-                                [ValueSource("_2S_")] [Random(RndCnt)] ulong z,
-                                [ValueSource("_2S_")] [Random(RndCnt)] ulong a,
-                                [Values(0u, 31u)] [Random(1u, 30u, RndCntShift)] uint shift,
-                                [Values(0b0u, 0b1u)] uint q) // <2S, 4S>
+        [Test, Pairwise]
+        public void Shl_Sli_V_2S_4S([ValueSource("_Shl_Sli_V_2S_4S_")] uint opcodes,
+                                    [Values(0u)]     uint rd,
+                                    [Values(1u, 0u)] uint rn,
+                                    [ValueSource("_2S_")] [Random(RndCnt)] ulong z,
+                                    [ValueSource("_2S_")] [Random(RndCnt)] ulong a,
+                                    [Values(0u, 31u)] [Random(1u, 30u, RndCntShift)] uint shift,
+                                    [Values(0b0u, 0b1u)] uint q) // <2S, 4S>
         {
             uint immHb = (32 + shift) & 0x7F;
 
-            uint opcode = 0x0F205400; // SHL V0.2S, V0.2S, #0
-            opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
-            opcode |= (immHb << 16);
-            opcode |= ((q & 1) << 30);
+            opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
+            opcodes |= (immHb << 16);
+            opcodes |= ((q & 1) << 30);
 
             Vector128<float> v0 = MakeVectorE0E1(z, z);
             Vector128<float> v1 = MakeVectorE0E1(a, a * q);
 
-            SingleOpcode(opcode, v0: v0, v1: v1);
+            SingleOpcode(opcodes, v0: v0, v1: v1);
 
             CompareAgainstUnicorn();
         }
 
-        [Test, Pairwise, Description("SHL <Vd>.<T>, <Vn>.<T>, #<shift>")]
-        public void Shl_V_2D([Values(0u)]     uint rd,
-                             [Values(1u, 0u)] uint rn,
-                             [ValueSource("_1D_")] [Random(RndCnt)] ulong z,
-                             [ValueSource("_1D_")] [Random(RndCnt)] ulong a,
-                             [Values(0u, 63u)] [Random(1u, 62u, RndCntShift)] uint shift)
+        [Test, Pairwise]
+        public void Shl_Sli_V_2D([ValueSource("_Shl_Sli_V_2D_")] uint opcodes,
+                                 [Values(0u)]     uint rd,
+                                 [Values(1u, 0u)] uint rn,
+                                 [ValueSource("_1D_")] [Random(RndCnt)] ulong z,
+                                 [ValueSource("_1D_")] [Random(RndCnt)] ulong a,
+                                 [Values(0u, 63u)] [Random(1u, 62u, RndCntShift)] uint shift)
         {
             uint immHb = (64 + shift) & 0x7F;
 
-            uint opcode = 0x4F405400; // SHL V0.2D, V0.2D, #0
-            opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
-            opcode |= (immHb << 16);
+            opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
+            opcodes |= (immHb << 16);
 
             Vector128<float> v0 = MakeVectorE0E1(z, z);
             Vector128<float> v1 = MakeVectorE0E1(a, a);
 
-            SingleOpcode(opcode, v0: v0, v1: v1);
+            SingleOpcode(opcodes, v0: v0, v1: v1);
 
             CompareAgainstUnicorn();
         }