diff --git a/ChocolArm64/ABitUtils.cs b/ChocolArm64/ABitUtils.cs
index 357dd45d15..dd41623564 100644
--- a/ChocolArm64/ABitUtils.cs
+++ b/ChocolArm64/ABitUtils.cs
@@ -27,6 +27,10 @@ namespace ChocolArm64
             return -1;
         }
 
+        private static readonly sbyte[] HbsNibbleTbl = { -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 };
+
+        public static int HighestBitSetNibble(int Value) => HbsNibbleTbl[Value & 0b1111];
+
         public static long Replicate(long Bits, int Size)
         {
             long Output = 0;
@@ -54,4 +58,4 @@ namespace ChocolArm64
             return Value != 0 && (Value & (Value - 1)) == 0;
         }
     }
-}
\ No newline at end of file
+}
diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs
index a73466ae11..b053334f3c 100644
--- a/ChocolArm64/AOpCodeTable.cs
+++ b/ChocolArm64/AOpCodeTable.cs
@@ -343,6 +343,7 @@ namespace ChocolArm64
             SetA64("0x001110<<1xxxxx100101xxxxxxxxxx", AInstEmit.Mla_V,         typeof(AOpCodeSimdReg));
             SetA64("0x101111xxxxxxxx0000x0xxxxxxxxxx", AInstEmit.Mla_Ve,        typeof(AOpCodeSimdRegElem));
             SetA64("0x101110<<1xxxxx100101xxxxxxxxxx", AInstEmit.Mls_V,         typeof(AOpCodeSimdReg));
+            SetA64("0x101111xxxxxxxx0100x0xxxxxxxxxx", AInstEmit.Mls_Ve,        typeof(AOpCodeSimdRegElem));
             SetA64("0x00111100000xxx0xx001xxxxxxxxxx", AInstEmit.Movi_V,        typeof(AOpCodeSimdImm));
             SetA64("0x00111100000xxx10x001xxxxxxxxxx", AInstEmit.Movi_V,        typeof(AOpCodeSimdImm));
             SetA64("0x00111100000xxx110x01xxxxxxxxxx", AInstEmit.Movi_V,        typeof(AOpCodeSimdImm));
@@ -380,8 +381,9 @@ namespace ChocolArm64
             SetA64("0101111000101000001010xxxxxxxxxx", AInstEmit.Sha256su0_V,   typeof(AOpCodeSimd));
             SetA64("01011110000xxxxx011000xxxxxxxxxx", AInstEmit.Sha256su1_V,   typeof(AOpCodeSimdReg));
             SetA64("0x001110<<1xxxxx000001xxxxxxxxxx", AInstEmit.Shadd_V,       typeof(AOpCodeSimdReg));
-            SetA64("010111110>>>>xxx010101xxxxxxxxxx", AInstEmit.Shl_S,         typeof(AOpCodeSimdShImm));
-            SetA64("0x0011110>>>>xxx010101xxxxxxxxxx", AInstEmit.Shl_V,         typeof(AOpCodeSimdShImm));
+            SetA64("0101111101xxxxxx010101xxxxxxxxxx", AInstEmit.Shl_S,         typeof(AOpCodeSimdShImm));
+            SetA64("0x00111100>>>xxx010101xxxxxxxxxx", AInstEmit.Shl_V,         typeof(AOpCodeSimdShImm));
+            SetA64("0100111101xxxxxx010101xxxxxxxxxx", AInstEmit.Shl_V,         typeof(AOpCodeSimdShImm));
             SetA64("0x101110<<100001001110xxxxxxxxxx", AInstEmit.Shll_V,        typeof(AOpCodeSimd));
             SetA64("0x00111100>>>xxx100001xxxxxxxxxx", AInstEmit.Shrn_V,        typeof(AOpCodeSimdShImm));
             SetA64("0x001110<<1xxxxx001001xxxxxxxxxx", AInstEmit.Shsub_V,       typeof(AOpCodeSimdReg));
@@ -415,13 +417,18 @@ namespace ChocolArm64
             SetA64("01111110<<100001001010xxxxxxxxxx", AInstEmit.Sqxtun_S,      typeof(AOpCodeSimd));
             SetA64("0x101110<<100001001010xxxxxxxxxx", AInstEmit.Sqxtun_V,      typeof(AOpCodeSimd));
             SetA64("0x001110<<1xxxxx000101xxxxxxxxxx", AInstEmit.Srhadd_V,      typeof(AOpCodeSimdReg));
+            SetA64("0101111101xxxxxx001001xxxxxxxxxx", AInstEmit.Srshr_S,       typeof(AOpCodeSimdShImm));
             SetA64("0x00111100>>>xxx001001xxxxxxxxxx", AInstEmit.Srshr_V,       typeof(AOpCodeSimdShImm));
             SetA64("0100111101xxxxxx001001xxxxxxxxxx", AInstEmit.Srshr_V,       typeof(AOpCodeSimdShImm));
+            SetA64("0101111101xxxxxx001101xxxxxxxxxx", AInstEmit.Srsra_S,       typeof(AOpCodeSimdShImm));
+            SetA64("0x00111100>>>xxx001101xxxxxxxxxx", AInstEmit.Srsra_V,       typeof(AOpCodeSimdShImm));
+            SetA64("0100111101xxxxxx001101xxxxxxxxxx", AInstEmit.Srsra_V,       typeof(AOpCodeSimdShImm));
             SetA64("0>001110<<1xxxxx010001xxxxxxxxxx", AInstEmit.Sshl_V,        typeof(AOpCodeSimdReg));
             SetA64("0x00111100>>>xxx101001xxxxxxxxxx", AInstEmit.Sshll_V,       typeof(AOpCodeSimdShImm));
             SetA64("0101111101xxxxxx000001xxxxxxxxxx", AInstEmit.Sshr_S,        typeof(AOpCodeSimdShImm));
             SetA64("0x00111100>>>xxx000001xxxxxxxxxx", AInstEmit.Sshr_V,        typeof(AOpCodeSimdShImm));
             SetA64("0100111101xxxxxx000001xxxxxxxxxx", AInstEmit.Sshr_V,        typeof(AOpCodeSimdShImm));
+            SetA64("0101111101xxxxxx000101xxxxxxxxxx", AInstEmit.Ssra_S,        typeof(AOpCodeSimdShImm));
             SetA64("0x00111100>>>xxx000101xxxxxxxxxx", AInstEmit.Ssra_V,        typeof(AOpCodeSimdShImm));
             SetA64("0100111101xxxxxx000101xxxxxxxxxx", AInstEmit.Ssra_V,        typeof(AOpCodeSimdShImm));
             SetA64("0x001110<<1xxxxx001000xxxxxxxxxx", AInstEmit.Ssubl_V,       typeof(AOpCodeSimdReg));
@@ -474,6 +481,12 @@ namespace ChocolArm64
             SetA64("01111110<<100001010010xxxxxxxxxx", AInstEmit.Uqxtn_S,       typeof(AOpCodeSimd));
             SetA64("0x101110<<100001010010xxxxxxxxxx", AInstEmit.Uqxtn_V,       typeof(AOpCodeSimd));
             SetA64("0x101110<<1xxxxx000101xxxxxxxxxx", AInstEmit.Urhadd_V,      typeof(AOpCodeSimdReg));
+            SetA64("0111111101xxxxxx001001xxxxxxxxxx", AInstEmit.Urshr_S,       typeof(AOpCodeSimdShImm));
+            SetA64("0x10111100>>>xxx001001xxxxxxxxxx", AInstEmit.Urshr_V,       typeof(AOpCodeSimdShImm));
+            SetA64("0110111101xxxxxx001001xxxxxxxxxx", AInstEmit.Urshr_V,       typeof(AOpCodeSimdShImm));
+            SetA64("0111111101xxxxxx001101xxxxxxxxxx", AInstEmit.Ursra_S,       typeof(AOpCodeSimdShImm));
+            SetA64("0x10111100>>>xxx001101xxxxxxxxxx", AInstEmit.Ursra_V,       typeof(AOpCodeSimdShImm));
+            SetA64("0110111101xxxxxx001101xxxxxxxxxx", AInstEmit.Ursra_V,       typeof(AOpCodeSimdShImm));
             SetA64("0>101110<<1xxxxx010001xxxxxxxxxx", AInstEmit.Ushl_V,        typeof(AOpCodeSimdReg));
             SetA64("0x10111100>>>xxx101001xxxxxxxxxx", AInstEmit.Ushll_V,       typeof(AOpCodeSimdShImm));
             SetA64("0111111101xxxxxx000001xxxxxxxxxx", AInstEmit.Ushr_S,        typeof(AOpCodeSimdShImm));
@@ -481,6 +494,7 @@ namespace ChocolArm64
             SetA64("0110111101xxxxxx000001xxxxxxxxxx", AInstEmit.Ushr_V,        typeof(AOpCodeSimdShImm));
             SetA64("01111110xx100000001110xxxxxxxxxx", AInstEmit.Usqadd_S,      typeof(AOpCodeSimd));
             SetA64("0>101110<<100000001110xxxxxxxxxx", AInstEmit.Usqadd_V,      typeof(AOpCodeSimd));
+            SetA64("0111111101xxxxxx000101xxxxxxxxxx", AInstEmit.Usra_S,        typeof(AOpCodeSimdShImm));
             SetA64("0x10111100>>>xxx000101xxxxxxxxxx", AInstEmit.Usra_V,        typeof(AOpCodeSimdShImm));
             SetA64("0110111101xxxxxx000101xxxxxxxxxx", AInstEmit.Usra_V,        typeof(AOpCodeSimdShImm));
             SetA64("0x101110<<1xxxxx001000xxxxxxxxxx", AInstEmit.Usubl_V,       typeof(AOpCodeSimdReg));
diff --git a/ChocolArm64/Decoder/AOpCodeSimdShImm.cs b/ChocolArm64/Decoder/AOpCodeSimdShImm.cs
index 6c8398817c..e6d5210f2f 100644
--- a/ChocolArm64/Decoder/AOpCodeSimdShImm.cs
+++ b/ChocolArm64/Decoder/AOpCodeSimdShImm.cs
@@ -10,7 +10,7 @@ namespace ChocolArm64.Decoder
         {
             Imm = (OpCode >> 16) & 0x7f;
 
-            Size = ABitUtils.HighestBitSet32(Imm >> 3);
+            Size = ABitUtils.HighestBitSetNibble(Imm >> 3);
         }
     }
-}
\ No newline at end of file
+}
diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
index a291a7e51d..b9aedd07b3 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
@@ -957,6 +957,15 @@ namespace ChocolArm64.Instruction
             });
         }
 
+        public static void Mls_Ve(AILEmitterCtx Context)
+        {
+            EmitVectorTernaryOpByElemZx(Context, () =>
+            {
+                Context.Emit(OpCodes.Mul);
+                Context.Emit(OpCodes.Sub);
+            });
+        }
+
         public static void Mul_V(AILEmitterCtx Context)
         {
             EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Mul));
diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
index 4ecfdae30f..cb884c1ac8 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
@@ -626,6 +626,9 @@ namespace ChocolArm64.Instruction
             int Bytes = Op.GetBitsCount() >> 3;
             int Elems = Bytes >> Op.Size;
 
+            EmitVectorExtract(Context, Op.Rm, Elem, Op.Size, Signed);
+            Context.EmitSttmp();
+
             for (int Index = 0; Index < Elems; Index++)
             {
                 if (Ternary)
@@ -634,7 +637,7 @@ namespace ChocolArm64.Instruction
                 }
 
                 EmitVectorExtract(Context, Op.Rn, Index, Op.Size, Signed);
-                EmitVectorExtract(Context, Op.Rm, Elem,  Op.Size, Signed);
+                Context.EmitLdtmp();
 
                 Emit();
 
diff --git a/ChocolArm64/Instruction/AInstEmitSimdShift.cs b/ChocolArm64/Instruction/AInstEmitSimdShift.cs
index 6f6b56068e..4dee53b9b1 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdShift.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdShift.cs
@@ -14,20 +14,24 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
 
-            EmitVectorExtractZx(Context, Op.Rn, 0, Op.Size);
+            EmitScalarUnaryOpZx(Context, () =>
+            {
+                Context.EmitLdc_I4(GetImmShl(Op));
 
-            Context.EmitLdc_I4(GetImmShl(Op));
-
-            Context.Emit(OpCodes.Shl);
-
-            EmitScalarSet(Context, Op.Rd, Op.Size);
+                Context.Emit(OpCodes.Shl);
+            });
         }
 
         public static void Shl_V(AILEmitterCtx Context)
         {
             AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
 
-            EmitVectorShImmBinaryZx(Context, () => Context.Emit(OpCodes.Shl), GetImmShl(Op));
+            EmitVectorUnaryOpZx(Context, () =>
+            {
+                Context.EmitLdc_I4(GetImmShl(Op));
+
+                Context.Emit(OpCodes.Shl);
+            });
         }
 
         public static void Shll_V(AILEmitterCtx Context)
@@ -103,15 +107,24 @@ namespace ChocolArm64.Instruction
             EmitVectorSaturatingNarrowOpSxSx(Context, Emit);
         }
 
+        public static void Srshr_S(AILEmitterCtx Context)
+        {
+            EmitScalarShrImmOpSx(Context, ShrImmFlags.Round);
+        }
+
         public static void Srshr_V(AILEmitterCtx Context)
         {
-            AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
+            EmitVectorShrImmOpSx(Context, ShrImmFlags.Round);
+        }
 
-            int Shift = GetImmShr(Op);
+        public static void Srsra_S(AILEmitterCtx Context)
+        {
+            EmitScalarShrImmOpSx(Context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+        }
 
-            long RoundConst = 1L << (Shift - 1);
-
-            EmitVectorRoundShImmBinarySx(Context, () => Context.Emit(OpCodes.Shr), Shift, RoundConst);
+        public static void Srsra_V(AILEmitterCtx Context)
+        {
+            EmitVectorShrImmOpSx(Context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
         }
 
         public static void Sshl_V(AILEmitterCtx Context)
@@ -128,35 +141,42 @@ namespace ChocolArm64.Instruction
 
         public static void Sshr_S(AILEmitterCtx Context)
         {
-            AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
-
-            EmitVectorExtractSx(Context, Op.Rn, 0, Op.Size);
-
-            Context.EmitLdc_I4(GetImmShr(Op));
-
-            Context.Emit(OpCodes.Shr);
-
-            EmitScalarSet(Context, Op.Rd, Op.Size);
+            EmitShrImmOp(Context, ShrImmFlags.ScalarSx);
         }
 
         public static void Sshr_V(AILEmitterCtx Context)
         {
-            AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
+            EmitShrImmOp(Context, ShrImmFlags.VectorSx);
+        }
 
-            EmitVectorShImmBinarySx(Context, () => Context.Emit(OpCodes.Shr), GetImmShr(Op));
+        public static void Ssra_S(AILEmitterCtx Context)
+        {
+            EmitScalarShrImmOpSx(Context, ShrImmFlags.Accumulate);
         }
 
         public static void Ssra_V(AILEmitterCtx Context)
         {
-            AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
+            EmitVectorShrImmOpSx(Context, ShrImmFlags.Accumulate);
+        }
 
-            Action Emit = () =>
-            {
-                Context.Emit(OpCodes.Shr);
-                Context.Emit(OpCodes.Add);
-            };
+        public static void Urshr_S(AILEmitterCtx Context)
+        {
+            EmitScalarShrImmOpZx(Context, ShrImmFlags.Round);
+        }
 
-            EmitVectorShImmTernarySx(Context, Emit, GetImmShr(Op));
+        public static void Urshr_V(AILEmitterCtx Context)
+        {
+            EmitVectorShrImmOpZx(Context, ShrImmFlags.Round);
+        }
+
+        public static void Ursra_S(AILEmitterCtx Context)
+        {
+            EmitScalarShrImmOpZx(Context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+        }
+
+        public static void Ursra_V(AILEmitterCtx Context)
+        {
+            EmitVectorShrImmOpZx(Context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
         }
 
         public static void Ushl_V(AILEmitterCtx Context)
@@ -173,41 +193,22 @@ namespace ChocolArm64.Instruction
 
         public static void Ushr_S(AILEmitterCtx Context)
         {
-            AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
-
-            EmitScalarUnaryOpZx(Context, () =>
-            {
-                Context.EmitLdc_I4(GetImmShr(Op));
-
-                Context.Emit(OpCodes.Shr_Un);
-            });
+            EmitShrImmOp(Context, ShrImmFlags.ScalarZx);
         }
 
         public static void Ushr_V(AILEmitterCtx Context)
         {
-            AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
+            EmitShrImmOp(Context, ShrImmFlags.VectorZx);
+        }
 
-            EmitVectorUnaryOpZx(Context, () =>
-            {
-                Context.EmitLdc_I4(GetImmShr(Op));
-
-                Context.Emit(OpCodes.Shr_Un);
-            });
+        public static void Usra_S(AILEmitterCtx Context)
+        {
+            EmitScalarShrImmOpZx(Context, ShrImmFlags.Accumulate);
         }
 
         public static void Usra_V(AILEmitterCtx Context)
         {
-            AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
-
-            Action Emit = () =>
-            {
-                Context.EmitLdc_I4(GetImmShr(Op));
-
-                Context.Emit(OpCodes.Shr_Un);
-                Context.Emit(OpCodes.Add);
-            };
-
-            EmitVectorOp(Context, Emit, OperFlags.RdRn, Signed: false);
+            EmitVectorShrImmOpZx(Context, ShrImmFlags.Accumulate);
         }
 
         private static void EmitVectorShl(AILEmitterCtx Context, bool Signed)
@@ -274,78 +275,118 @@ namespace ChocolArm64.Instruction
         }
 
         [Flags]
-        private enum ShImmFlags
+        private enum ShrImmFlags
         {
-            None = 0,
+            Scalar = 1 << 0,
+            Signed = 1 << 1,
 
-            Signed  = 1 << 0,
-            Ternary = 1 << 1,
-            Rounded = 1 << 2,
+            Round      = 1 << 2,
+            Accumulate = 1 << 3,
 
-            SignedTernary = Signed | Ternary,
-            SignedRounded = Signed | Rounded
+            ScalarSx = Scalar | Signed,
+            ScalarZx = Scalar,
+
+            VectorSx = Signed,
+            VectorZx = 0
         }
 
-        private static void EmitVectorShImmBinarySx(AILEmitterCtx Context, Action Emit, int Imm)
+        private static void EmitScalarShrImmOpSx(AILEmitterCtx Context, ShrImmFlags Flags)
         {
-            EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.Signed);
+            EmitShrImmOp(Context, ShrImmFlags.ScalarSx | Flags);
         }
 
-        private static void EmitVectorShImmTernarySx(AILEmitterCtx Context, Action Emit, int Imm)
+        private static void EmitScalarShrImmOpZx(AILEmitterCtx Context, ShrImmFlags Flags)
         {
-            EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.SignedTernary);
+            EmitShrImmOp(Context, ShrImmFlags.ScalarZx | Flags);
         }
 
-        private static void EmitVectorShImmBinaryZx(AILEmitterCtx Context, Action Emit, int Imm)
+        private static void EmitVectorShrImmOpSx(AILEmitterCtx Context, ShrImmFlags Flags)
         {
-            EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.None);
+            EmitShrImmOp(Context, ShrImmFlags.VectorSx | Flags);
         }
 
-        private static void EmitVectorRoundShImmBinarySx(AILEmitterCtx Context, Action Emit, int Imm, long Rc)
+        private static void EmitVectorShrImmOpZx(AILEmitterCtx Context, ShrImmFlags Flags)
         {
-            EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.SignedRounded, Rc);
+            EmitShrImmOp(Context, ShrImmFlags.VectorZx | Flags);
         }
 
-        private static void EmitVectorShImmOp(AILEmitterCtx Context, Action Emit, int Imm, ShImmFlags Flags, long Rc = 0)
+        private static void EmitShrImmOp(AILEmitterCtx Context, ShrImmFlags Flags)
         {
-            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
+            AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
+
+            bool Scalar     = (Flags & ShrImmFlags.Scalar)     != 0;
+            bool Signed     = (Flags & ShrImmFlags.Signed)     != 0;
+            bool Round      = (Flags & ShrImmFlags.Round)      != 0;
+            bool Accumulate = (Flags & ShrImmFlags.Accumulate) != 0;
+
+            int Shift = GetImmShr(Op);
+
+            long RoundConst = 1L << (Shift - 1);
 
             int Bytes = Op.GetBitsCount() >> 3;
-            int Elems = Bytes >> Op.Size;
-
-            bool Signed  = (Flags & ShImmFlags.Signed)  != 0;
-            bool Ternary = (Flags & ShImmFlags.Ternary) != 0;
-            bool Rounded = (Flags & ShImmFlags.Rounded) != 0;
+            int Elems = !Scalar ? Bytes >> Op.Size : 1;
 
             for (int Index = 0; Index < Elems; Index++)
             {
-                if (Ternary)
-                {
-                    EmitVectorExtract(Context, Op.Rd, Index, Op.Size, Signed);
-                }
-
                 EmitVectorExtract(Context, Op.Rn, Index, Op.Size, Signed);
 
-                if (Rounded)
+                if (Op.Size <= 2)
                 {
-                    Context.EmitLdc_I8(Rc);
+                    if (Round)
+                    {
+                        Context.EmitLdc_I8(RoundConst);
+
+                        Context.Emit(OpCodes.Add);
+                    }
+
+                    Context.EmitLdc_I4(Shift);
+
+                    Context.Emit(Signed ? OpCodes.Shr : OpCodes.Shr_Un);
+                }
+                else /* if (Op.Size == 3) */
+                {
+                    EmitShrImm_64(Context, Signed, Round ? RoundConst : 0L, Shift);
+                }
+
+                if (Accumulate)
+                {
+                    EmitVectorExtract(Context, Op.Rd, Index, Op.Size, Signed);
 
                     Context.Emit(OpCodes.Add);
                 }
 
-                Context.EmitLdc_I4(Imm);
-
-                Emit();
-
-                EmitVectorInsert(Context, Op.Rd, Index, Op.Size);
+                EmitVectorInsertTmp(Context, Index, Op.Size);
             }
 
-            if (Op.RegisterSize == ARegisterSize.SIMD64)
+            Context.EmitLdvectmp();
+            Context.EmitStvec(Op.Rd);
+
+            if ((Op.RegisterSize == ARegisterSize.SIMD64) || Scalar)
             {
                 EmitVectorZeroUpper(Context, Op.Rd);
             }
         }
 
+        // Dst_64 = (Int(Src_64, Signed) + RoundConst) >> Shift;
+        private static void EmitShrImm_64(
+            AILEmitterCtx Context,
+            bool Signed,
+            long RoundConst,
+            int  Shift)
+        {
+            if (((AOpCodeSimd)Context.CurrOp).Size < 3)
+            {
+                throw new InvalidOperationException();
+            }
+
+            Context.EmitLdc_I8(RoundConst);
+            Context.EmitLdc_I4(Shift);
+
+            ASoftFallback.EmitCall(Context, Signed
+                ? nameof(ASoftFallback.SignedShrImm_64)
+                : nameof(ASoftFallback.UnsignedShrImm_64));
+        }
+
         private static void EmitVectorShImmNarrowBinarySx(AILEmitterCtx Context, Action Emit, int Imm)
         {
             EmitVectorShImmNarrowBinaryOp(Context, Emit, Imm, true);
@@ -414,4 +455,4 @@ namespace ChocolArm64.Instruction
             Context.EmitStvec(Op.Rd);
         }
     }
-}
\ No newline at end of file
+}
diff --git a/ChocolArm64/Instruction/ASoftFallback.cs b/ChocolArm64/Instruction/ASoftFallback.cs
index 0ae84ab2d4..a7bc108591 100644
--- a/ChocolArm64/Instruction/ASoftFallback.cs
+++ b/ChocolArm64/Instruction/ASoftFallback.cs
@@ -16,6 +16,92 @@ namespace ChocolArm64.Instruction
             Context.EmitCall(typeof(ASoftFallback), MthdName);
         }
 
+#region "ShrImm_64"
+        public static long SignedShrImm_64(long Value, long RoundConst, int Shift)
+        {
+            if (RoundConst == 0L)
+            {
+                if (Shift <= 63)
+                {
+                    return Value >> Shift;
+                }
+                else /* if (Shift == 64) */
+                {
+                    if (Value < 0L)
+                    {
+                        return -1L;
+                    }
+                    else
+                    {
+                        return 0L;
+                    }
+                }
+            }
+            else /* if (RoundConst == 1L << (Shift - 1)) */
+            {
+                if (Shift <= 63)
+                {
+                    long Add = Value + RoundConst;
+
+                    if ((~Value & (Value ^ Add)) < 0L)
+                    {
+                        return (long)((ulong)Add >> Shift);
+                    }
+                    else
+                    {
+                        return Add >> Shift;
+                    }
+                }
+                else /* if (Shift == 64) */
+                {
+                    return 0L;
+                }
+            }
+        }
+
+        public static ulong UnsignedShrImm_64(ulong Value, long RoundConst, int Shift)
+        {
+            if (RoundConst == 0L)
+            {
+                if (Shift <= 63)
+                {
+                    return Value >> Shift;
+                }
+                else /* if (Shift == 64) */
+                {
+                    return 0UL;
+                }
+            }
+            else /* if (RoundConst == 1L << (Shift - 1)) */
+            {
+                ulong Add = Value + (ulong)RoundConst;
+
+                if ((Add < Value) && (Add < (ulong)RoundConst))
+                {
+                    if (Shift <= 63)
+                    {
+                        return (Add >> Shift) | (0x8000000000000000UL >> (Shift - 1));
+                    }
+                    else /* if (Shift == 64) */
+                    {
+                        return 1UL;
+                    }
+                }
+                else
+                {
+                    if (Shift <= 63)
+                    {
+                        return Add >> Shift;
+                    }
+                    else /* if (Shift == 64) */
+                    {
+                        return 0UL;
+                    }
+                }
+            }
+        }
+#endregion
+
 #region "Saturating"
         public static long SignedSrcSignedDstSatQ(long op, int Size, AThreadState State)
         {
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs b/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs
new file mode 100644
index 0000000000..4d14ab4859
--- /dev/null
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs
@@ -0,0 +1,143 @@
+#define SimdRegElem
+
+using ChocolArm64.State;
+
+using NUnit.Framework;
+
+using System.Runtime.Intrinsics;
+
+namespace Ryujinx.Tests.Cpu
+{
+    [Category("SimdRegElem")] // Tested: second half of 2018.
+    public sealed class CpuTestSimdRegElem : CpuTest
+    {
+#if SimdRegElem
+
+#region "ValueSource"
+        private static ulong[] _2S_()
+        {
+            return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFF7FFFFFFFul,
+                                 0x8000000080000000ul, 0xFFFFFFFFFFFFFFFFul };
+        }
+
+        private static ulong[] _4H_()
+        {
+            return new ulong[] { 0x0000000000000000ul, 0x7FFF7FFF7FFF7FFFul,
+                                 0x8000800080008000ul, 0xFFFFFFFFFFFFFFFFul };
+        }
+#endregion
+
+        private const int RndCnt = 2;
+
+        [Test, Pairwise, Description("MLA <Vd>.<T>, <Vn>.<T>, <Vm>.<Ts>[<index>]")]
+        public void Mla_Ve_4H_8H([Values(0u)]     uint Rd,
+                                 [Values(1u, 0u)] uint Rn,
+                                 [Values(2u, 0u)] uint Rm,
+                                 [ValueSource("_4H_")] [Random(RndCnt)] ulong Z,
+                                 [ValueSource("_4H_")] [Random(RndCnt)] ulong A,
+                                 [ValueSource("_4H_")] [Random(RndCnt)] ulong B,
+                                 [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint Index,
+                                 [Values(0b0u, 0b1u)] uint Q) // <4H, 8H>
+        {
+            uint H = (Index & 4) >> 2;
+            uint L = (Index & 2) >> 1;
+            uint M = (Index & 1) >> 0;
+
+            uint Opcode = 0x2F400000; // MLA V0.4H, V0.4H, V0.H[0]
+            Opcode |= ((Rm & 15) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= (L << 21) | (M << 20) | (H << 11);
+            Opcode |= ((Q & 1) << 30);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A * Q);
+            Vector128<float> V2 = MakeVectorE0E1(B, B * H);
+
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("MLA <Vd>.<T>, <Vn>.<T>, <Vm>.<Ts>[<index>]")]
+        public void Mla_Ve_2S_4S([Values(0u)]     uint Rd,
+                                 [Values(1u, 0u)] uint Rn,
+                                 [Values(2u, 0u)] uint Rm,
+                                 [ValueSource("_2S_")] [Random(RndCnt)] ulong Z,
+                                 [ValueSource("_2S_")] [Random(RndCnt)] ulong A,
+                                 [ValueSource("_2S_")] [Random(RndCnt)] ulong B,
+                                 [Values(0u, 1u, 2u, 3u)] uint Index,
+                                 [Values(0b0u, 0b1u)] uint Q) // <2S, 4S>
+        {
+            uint H = (Index & 2) >> 1;
+            uint L = (Index & 1) >> 0;
+
+            uint Opcode = 0x2F800000; // MLA V0.2S, V0.2S, V0.S[0]
+            Opcode |= ((Rm & 15) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= (L << 21) | (H << 11);
+            Opcode |= ((Q & 1) << 30);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A * Q);
+            Vector128<float> V2 = MakeVectorE0E1(B, B * H);
+
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("MLS <Vd>.<T>, <Vn>.<T>, <Vm>.<Ts>[<index>]")]
+        public void Mls_Ve_4H_8H([Values(0u)]     uint Rd,
+                                 [Values(1u, 0u)] uint Rn,
+                                 [Values(2u, 0u)] uint Rm,
+                                 [ValueSource("_4H_")] [Random(RndCnt)] ulong Z,
+                                 [ValueSource("_4H_")] [Random(RndCnt)] ulong A,
+                                 [ValueSource("_4H_")] [Random(RndCnt)] ulong B,
+                                 [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint Index,
+                                 [Values(0b0u, 0b1u)] uint Q) // <4H, 8H>
+        {
+            uint H = (Index & 4) >> 2;
+            uint L = (Index & 2) >> 1;
+            uint M = (Index & 1) >> 0;
+
+            uint Opcode = 0x2F404000; // MLS V0.4H, V0.4H, V0.H[0]
+            Opcode |= ((Rm & 15) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= (L << 21) | (M << 20) | (H << 11);
+            Opcode |= ((Q & 1) << 30);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A * Q);
+            Vector128<float> V2 = MakeVectorE0E1(B, B * H);
+
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("MLS <Vd>.<T>, <Vn>.<T>, <Vm>.<Ts>[<index>]")]
+        public void Mls_Ve_2S_4S([Values(0u)]     uint Rd,
+                                 [Values(1u, 0u)] uint Rn,
+                                 [Values(2u, 0u)] uint Rm,
+                                 [ValueSource("_2S_")] [Random(RndCnt)] ulong Z,
+                                 [ValueSource("_2S_")] [Random(RndCnt)] ulong A,
+                                 [ValueSource("_2S_")] [Random(RndCnt)] ulong B,
+                                 [Values(0u, 1u, 2u, 3u)] uint Index,
+                                 [Values(0b0u, 0b1u)] uint Q) // <2S, 4S>
+        {
+            uint H = (Index & 2) >> 1;
+            uint L = (Index & 1) >> 0;
+
+            uint Opcode = 0x2F804000; // MLS V0.2S, V0.2S, V0.S[0]
+            Opcode |= ((Rm & 15) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= (L << 21) | (H << 11);
+            Opcode |= ((Q & 1) << 30);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A * Q);
+            Vector128<float> V2 = MakeVectorE0E1(B, B * H);
+
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+
+            CompareAgainstUnicorn();
+        }
+#endif
+    }
+}
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs b/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs
new file mode 100644
index 0000000000..7728522265
--- /dev/null
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs
@@ -0,0 +1,344 @@
+#define SimdShImm
+
+using ChocolArm64.State;
+
+using NUnit.Framework;
+
+using System.Runtime.Intrinsics;
+
+namespace Ryujinx.Tests.Cpu
+{
+    [Category("SimdShImm")] // Tested: second half of 2018.
+    public sealed class CpuTestSimdShImm : CpuTest
+    {
+#if SimdShImm
+
+#region "ValueSource (Types)"
+        private static ulong[] _1D_()
+        {
+            return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul,
+                                 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul };
+        }
+
+        private static ulong[] _2S_()
+        {
+            return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFF7FFFFFFFul,
+                                 0x8000000080000000ul, 0xFFFFFFFFFFFFFFFFul };
+        }
+
+        private static ulong[] _4H_()
+        {
+            return new ulong[] { 0x0000000000000000ul, 0x7FFF7FFF7FFF7FFFul,
+                                 0x8000800080008000ul, 0xFFFFFFFFFFFFFFFFul };
+        }
+
+        private static ulong[] _8B_()
+        {
+            return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful,
+                                 0x8080808080808080ul, 0xFFFFFFFFFFFFFFFFul };
+        }
+#endregion
+
+#region "ValueSource (Opcodes)"
+        private static uint[] _ShrImm_S_D_()
+        {
+            return new uint[]
+            {
+                0x5F402400u, // SRSHR D0, D0, #64
+                0x5F403400u, // SRSRA D0, D0, #64
+                0x5F400400u, // SSHR  D0, D0, #64
+                0x5F401400u, // SSRA  D0, D0, #64
+                0x7F402400u, // URSHR D0, D0, #64
+                0x7F403400u, // URSRA D0, D0, #64
+                0x7F400400u, // USHR  D0, D0, #64
+                0x7F401400u  // USRA  D0, D0, #64
+            };
+        }
+
+        private static uint[] _ShrImm_V_8B_16B_()
+        {
+            return new uint[]
+            {
+                0x0F082400u, // SRSHR V0.8B, V0.8B, #8
+                0x0F083400u, // SRSRA V0.8B, V0.8B, #8
+                0x0F080400u, // SSHR  V0.8B, V0.8B, #8
+                0x0F081400u, // SSRA  V0.8B, V0.8B, #8
+                0x2F082400u, // URSHR V0.8B, V0.8B, #8
+                0x2F083400u, // URSRA V0.8B, V0.8B, #8
+                0x2F080400u, // USHR  V0.8B, V0.8B, #8
+                0x2F081400u  // USRA  V0.8B, V0.8B, #8
+            };
+        }
+
+        private static uint[] _ShrImm_V_4H_8H_()
+        {
+            return new uint[]
+            {
+                0x0F102400u, // SRSHR V0.4H, V0.4H, #16
+                0x0F103400u, // SRSRA V0.4H, V0.4H, #16
+                0x0F100400u, // SSHR  V0.4H, V0.4H, #16
+                0x0F101400u, // SSRA  V0.4H, V0.4H, #16
+                0x2F102400u, // URSHR V0.4H, V0.4H, #16
+                0x2F103400u, // URSRA V0.4H, V0.4H, #16
+                0x2F100400u, // USHR  V0.4H, V0.4H, #16
+                0x2F101400u  // USRA  V0.4H, V0.4H, #16
+            };
+        }
+
+        private static uint[] _ShrImm_V_2S_4S_()
+        {
+            return new uint[]
+            {
+                0x0F202400u, // SRSHR V0.2S, V0.2S, #32
+                0x0F203400u, // SRSRA V0.2S, V0.2S, #32
+                0x0F200400u, // SSHR  V0.2S, V0.2S, #32
+                0x0F201400u, // SSRA  V0.2S, V0.2S, #32
+                0x2F202400u, // URSHR V0.2S, V0.2S, #32
+                0x2F203400u, // URSRA V0.2S, V0.2S, #32
+                0x2F200400u, // USHR  V0.2S, V0.2S, #32
+                0x2F201400u  // USRA  V0.2S, V0.2S, #32
+            };
+        }
+
+        private static uint[] _ShrImm_V_2D_()
+        {
+            return new uint[]
+            {
+                0x4F402400u, // SRSHR V0.2D, V0.2D, #64
+                0x4F403400u, // SRSRA V0.2D, V0.2D, #64
+                0x4F400400u, // SSHR  V0.2D, V0.2D, #64
+                0x4F401400u, // SSRA  V0.2D, V0.2D, #64
+                0x6F402400u, // URSHR V0.2D, V0.2D, #64
+                0x6F403400u, // URSRA V0.2D, V0.2D, #64
+                0x6F400400u, // USHR  V0.2D, V0.2D, #64
+                0x6F401400u  // USRA  V0.2D, V0.2D, #64
+            };
+        }
+#endregion
+
+        private const int RndCnt = 2;
+
+        [Test, Pairwise, Description("SHL <V><d>, <V><n>, #<shift>")]
+        public void Shl_S_D([Values(0u)]     uint Rd,
+                            [Values(1u, 0u)] uint Rn,
+                            [ValueSource("_1D_")] [Random(RndCnt)] ulong Z,
+                            [ValueSource("_1D_")] [Random(RndCnt)] ulong A,
+                            [Range(0u, 63u)] uint Shift)
+        {
+            uint ImmHB = (64 + Shift) & 0x7F;
+
+            uint Opcode = 0x5F405400; // SHL D0, D0, #0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= (ImmHB << 16);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("SHL <Vd>.<T>, <Vn>.<T>, #<shift>")]
+        public void Shl_V_8B_16B([Values(0u)]     uint Rd,
+                                 [Values(1u, 0u)] uint Rn,
+                                 [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                                 [ValueSource("_8B_")] [Random(RndCnt)] ulong A,
+                                 [Range(0u, 7u)] uint Shift,
+                                 [Values(0b0u, 0b1u)] uint Q) // <8B, 16B>
+        {
+            uint ImmHB = (8 + Shift) & 0x7F;
+
+            uint Opcode = 0x0F085400; // SHL V0.8B, V0.8B, #0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= (ImmHB << 16);
+            Opcode |= ((Q & 1) << 30);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A * Q);
+
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("SHL <Vd>.<T>, <Vn>.<T>, #<shift>")]
+        public void Shl_V_4H_8H([Values(0u)]     uint Rd,
+                                [Values(1u, 0u)] uint Rn,
+                                [ValueSource("_4H_")] [Random(RndCnt)] ulong Z,
+                                [ValueSource("_4H_")] [Random(RndCnt)] ulong A,
+                                [Range(0u, 15u)] uint Shift,
+                                [Values(0b0u, 0b1u)] uint Q) // <4H, 8H>
+        {
+            uint ImmHB = (16 + Shift) & 0x7F;
+
+            uint Opcode = 0x0F105400; // SHL V0.4H, V0.4H, #0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= (ImmHB << 16);
+            Opcode |= ((Q & 1) << 30);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A * Q);
+
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("SHL <Vd>.<T>, <Vn>.<T>, #<shift>")]
+        public void Shl_V_2S_4S([Values(0u)]     uint Rd,
+                                [Values(1u, 0u)] uint Rn,
+                                [ValueSource("_2S_")] [Random(RndCnt)] ulong Z,
+                                [ValueSource("_2S_")] [Random(RndCnt)] ulong A,
+                                [Range(0u, 31u)] uint Shift,
+                                [Values(0b0u, 0b1u)] uint Q) // <2S, 4S>
+        {
+            uint ImmHB = (32 + Shift) & 0x7F;
+
+            uint Opcode = 0x0F205400; // SHL V0.2S, V0.2S, #0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= (ImmHB << 16);
+            Opcode |= ((Q & 1) << 30);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A * Q);
+
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("SHL <Vd>.<T>, <Vn>.<T>, #<shift>")]
+        public void Shl_V_2D([Values(0u)]     uint Rd,
+                             [Values(1u, 0u)] uint Rn,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong Z,
+                             [ValueSource("_1D_")] [Random(RndCnt)] ulong A,
+                             [Range(0u, 63u)] uint Shift)
+        {
+            uint ImmHB = (64 + Shift) & 0x7F;
+
+            uint Opcode = 0x4F405400; // SHL V0.2D, V0.2D, #0
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcode |= (ImmHB << 16);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise]
+        public void ShrImm_S_D([ValueSource("_ShrImm_S_D_")] uint Opcodes,
+                               [Values(0u)]     uint Rd,
+                               [Values(1u, 0u)] uint Rn,
+                               [ValueSource("_1D_")] [Random(RndCnt)] ulong Z,
+                               [ValueSource("_1D_")] [Random(RndCnt)] ulong A,
+                               [Range(1u, 64u)] uint Shift)
+        {
+            uint ImmHB = (128 - Shift) & 0x7F;
+
+            Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcodes |= (ImmHB << 16);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0(A);
+
+            AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise]
+        public void ShrImm_V_8B_16B([ValueSource("_ShrImm_V_8B_16B_")] uint Opcodes,
+                                    [Values(0u)]     uint Rd,
+                                    [Values(1u, 0u)] uint Rn,
+                                    [ValueSource("_8B_")] [Random(RndCnt)] ulong Z,
+                                    [ValueSource("_8B_")] [Random(RndCnt)] ulong A,
+                                    [Range(1u, 8u)] uint Shift,
+                                    [Values(0b0u, 0b1u)] uint Q) // <8B, 16B>
+        {
+            uint ImmHB = (16 - Shift) & 0x7F;
+
+            Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcodes |= (ImmHB << 16);
+            Opcodes |= ((Q & 1) << 30);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A * Q);
+
+            AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise]
+        public void ShrImm_V_4H_8H([ValueSource("_ShrImm_V_4H_8H_")] uint Opcodes,
+                                   [Values(0u)]     uint Rd,
+                                   [Values(1u, 0u)] uint Rn,
+                                   [ValueSource("_4H_")] [Random(RndCnt)] ulong Z,
+                                   [ValueSource("_4H_")] [Random(RndCnt)] ulong A,
+                                   [Range(1u, 16u)] uint Shift,
+                                   [Values(0b0u, 0b1u)] uint Q) // <4H, 8H>
+        {
+            uint ImmHB = (32 - Shift) & 0x7F;
+
+            Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcodes |= (ImmHB << 16);
+            Opcodes |= ((Q & 1) << 30);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A * Q);
+
+            AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise]
+        public void ShrImm_V_2S_4S([ValueSource("_ShrImm_V_2S_4S_")] uint Opcodes,
+                                   [Values(0u)]     uint Rd,
+                                   [Values(1u, 0u)] uint Rn,
+                                   [ValueSource("_2S_")] [Random(RndCnt)] ulong Z,
+                                   [ValueSource("_2S_")] [Random(RndCnt)] ulong A,
+                                   [Range(1u, 32u)] uint Shift,
+                                   [Values(0b0u, 0b1u)] uint Q) // <2S, 4S>
+        {
+            uint ImmHB = (64 - Shift) & 0x7F;
+
+            Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcodes |= (ImmHB << 16);
+            Opcodes |= ((Q & 1) << 30);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A * Q);
+
+            AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise]
+        public void ShrImm_V_2D([ValueSource("_ShrImm_V_2D_")] uint Opcodes,
+                                [Values(0u)]     uint Rd,
+                                [Values(1u, 0u)] uint Rn,
+                                [ValueSource("_1D_")] [Random(RndCnt)] ulong Z,
+                                [ValueSource("_1D_")] [Random(RndCnt)] ulong A,
+                                [Range(1u, 64u)] uint Shift)
+        {
+            uint ImmHB = (128 - Shift) & 0x7F;
+
+            Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcodes |= (ImmHB << 16);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+
+            AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1);
+
+            CompareAgainstUnicorn();
+        }
+#endif
+    }
+}