From c228cf320d476303da679066c67c3a8c9c6aa3e1 Mon Sep 17 00:00:00 2001
From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>
Date: Tue, 3 Jul 2018 08:31:16 +0200
Subject: [PATCH] Add Rbit_V instruction. Add 8 tests (Rbit_V; Rev16_V,
 Rev32_V, Rev64_V). Improve CountSetBits8() algorithm. (#212)

* Update AOpCodeTable.cs

* Update AInstEmitSimdArithmetic.cs

* Update AInstEmitSimdLogical.cs

* Update AVectorHelper.cs

* Update ASoftFallback.cs

* Update Instructions.cs

* Update CpuTestSimd.cs

* Update CpuTestSimdReg.cs

* Improve CountSetBits8() algorithm.

* Improve CountSetBits8() algorithm.
---
 ChocolArm64/AOpCodeTable.cs                   |   1 +
 .../Instruction/AInstEmitSimdArithmetic.cs    |   4 +-
 .../Instruction/AInstEmitSimdLogical.cs       |  37 ++-
 ChocolArm64/Instruction/ASoftFallback.cs      |  24 +-
 ChocolArm64/Instruction/AVectorHelper.cs      |  10 +-
 Ryujinx.Tests/Cpu/CpuTestSimd.cs              | 174 ++++++++++-
 Ryujinx.Tests/Cpu/CpuTestSimdReg.cs           |   2 +-
 Ryujinx.Tests/Cpu/Tester/Instructions.cs      | 287 +++++++++++++++++-
 8 files changed, 502 insertions(+), 37 deletions(-)

diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs
index 09a6ca4a2b..7cef4398fa 100644
--- a/ChocolArm64/AOpCodeTable.cs
+++ b/ChocolArm64/AOpCodeTable.cs
@@ -348,6 +348,7 @@ namespace ChocolArm64
             SetA64("0x001110101xxxxx000111xxxxxxxxxx", AInstEmit.Orr_V,         typeof(AOpCodeSimdReg));
             SetA64("0x00111100000xxx<<x101xxxxxxxxxx", AInstEmit.Orr_Vi,        typeof(AOpCodeSimdImm));
             SetA64("0x101110<<1xxxxx010000xxxxxxxxxx", AInstEmit.Raddhn_V,      typeof(AOpCodeSimdReg));
+            SetA64("0x10111001100000010110xxxxxxxxxx", AInstEmit.Rbit_V,        typeof(AOpCodeSimd));
             SetA64("0x00111000100000000110xxxxxxxxxx", AInstEmit.Rev16_V,       typeof(AOpCodeSimd));
             SetA64("0x1011100x100000000010xxxxxxxxxx", AInstEmit.Rev32_V,       typeof(AOpCodeSimd));
             SetA64("0x001110<<100000000010xxxxxxxxxx", AInstEmit.Rev64_V,       typeof(AOpCodeSimd));
diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
index e61979b0a1..06844526f0 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
@@ -151,9 +151,9 @@ namespace ChocolArm64.Instruction
             {
                 EmitVectorExtractZx(Context, Op.Rn, Index, 0);
 
-                Context.Emit(OpCodes.Conv_U1);
+                Context.Emit(OpCodes.Conv_U4);
 
-                AVectorHelper.EmitCall(Context, nameof(AVectorHelper.CountSetBits8));
+                ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountSetBits8));
 
                 Context.Emit(OpCodes.Conv_U8);
 
diff --git a/ChocolArm64/Instruction/AInstEmitSimdLogical.cs b/ChocolArm64/Instruction/AInstEmitSimdLogical.cs
index 163151f84d..8475a8a474 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdLogical.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdLogical.cs
@@ -56,8 +56,9 @@ namespace ChocolArm64.Instruction
             AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
 
             int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Elems = Bytes >> Op.Size;
 
-            for (int Index = 0; Index < (Bytes >> Op.Size); Index++)
+            for (int Index = 0; Index < Elems; Index++)
             {
                 EmitVectorExtractZx(Context, Op.Rd, Index, Op.Size);
                 EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size);
@@ -145,6 +146,31 @@ namespace ChocolArm64.Instruction
             EmitVectorImmBinaryOp(Context, () => Context.Emit(OpCodes.Or));
         }
 
+        public static void Rbit_V(AILEmitterCtx Context)
+        {
+            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
+
+            int Elems = Op.RegisterSize == ARegisterSize.SIMD128 ? 16 : 8;
+
+            for (int Index = 0; Index < Elems; Index++)
+            {
+                EmitVectorExtractZx(Context, Op.Rn, Index, 0);
+
+                Context.Emit(OpCodes.Conv_U4);
+
+                ASoftFallback.EmitCall(Context, nameof(ASoftFallback.ReverseBits8));
+
+                Context.Emit(OpCodes.Conv_U8);
+
+                EmitVectorInsert(Context, Op.Rd, Index, 0);
+            }
+
+            if (Op.RegisterSize == ARegisterSize.SIMD64)
+            {
+                EmitVectorZeroUpper(Context, Op.Rd);
+            }
+        }
+
         public static void Rev16_V(AILEmitterCtx Context)
         {
             EmitRev_V(Context, ContainerSize: 1);
@@ -164,18 +190,17 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
-
-            int Elems = Bytes >> Op.Size;
-
             if (Op.Size >= ContainerSize)
             {
                 throw new InvalidOperationException();
             }
 
+            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Elems = Bytes >> Op.Size;
+
             int ContainerMask = (1 << (ContainerSize - Op.Size)) - 1;
 
-            for (int Index = 0; Index < (Bytes >> Op.Size); Index++)
+            for (int Index = 0; Index < Elems; Index++)
             {
                 int RevIndex = Index ^ ContainerMask;
 
diff --git a/ChocolArm64/Instruction/ASoftFallback.cs b/ChocolArm64/Instruction/ASoftFallback.cs
index d626622ab5..5c0a9c8e3a 100644
--- a/ChocolArm64/Instruction/ASoftFallback.cs
+++ b/ChocolArm64/Instruction/ASoftFallback.cs
@@ -30,6 +30,14 @@ namespace ChocolArm64.Instruction
             return (ulong)Size;
         }
 
+        public static uint CountSetBits8(uint Value)
+        {
+            Value = ((Value >> 1) & 0x55) + (Value & 0x55);
+            Value = ((Value >> 2) & 0x33) + (Value & 0x33);
+
+            return (Value >> 4) + (Value & 0x0f);
+        }
+
         private const uint Crc32RevPoly  = 0xedb88320;
         private const uint Crc32cRevPoly = 0x82f63b78;
 
@@ -89,6 +97,14 @@ namespace ChocolArm64.Instruction
             return Crc;
         }
 
+        public static uint ReverseBits8(uint Value)
+        {
+            Value = ((Value & 0xaa) >> 1) | ((Value & 0x55) << 1);
+            Value = ((Value & 0xcc) >> 2) | ((Value & 0x33) << 2);
+
+            return (Value >> 4) | ((Value & 0x0f) << 4);
+        }
+
         public static uint ReverseBits32(uint Value)
         {
             Value = ((Value & 0xaaaaaaaa) >> 1) | ((Value & 0x55555555) << 1);
@@ -101,10 +117,10 @@ namespace ChocolArm64.Instruction
 
         public static ulong ReverseBits64(ulong Value)
         {
-            Value = ((Value & 0xaaaaaaaaaaaaaaaa) >>  1) | ((Value & 0x5555555555555555) <<  1);
-            Value = ((Value & 0xcccccccccccccccc) >>  2) | ((Value & 0x3333333333333333) <<  2);
-            Value = ((Value & 0xf0f0f0f0f0f0f0f0) >>  4) | ((Value & 0x0f0f0f0f0f0f0f0f) <<  4);
-            Value = ((Value & 0xff00ff00ff00ff00) >>  8) | ((Value & 0x00ff00ff00ff00ff) <<  8);
+            Value = ((Value & 0xaaaaaaaaaaaaaaaa) >> 1 ) | ((Value & 0x5555555555555555) << 1 );
+            Value = ((Value & 0xcccccccccccccccc) >> 2 ) | ((Value & 0x3333333333333333) << 2 );
+            Value = ((Value & 0xf0f0f0f0f0f0f0f0) >> 4 ) | ((Value & 0x0f0f0f0f0f0f0f0f) << 4 );
+            Value = ((Value & 0xff00ff00ff00ff00) >> 8 ) | ((Value & 0x00ff00ff00ff00ff) << 8 );
             Value = ((Value & 0xffff0000ffff0000) >> 16) | ((Value & 0x0000ffff0000ffff) << 16);
 
             return (Value >> 32) | (Value << 32);
diff --git a/ChocolArm64/Instruction/AVectorHelper.cs b/ChocolArm64/Instruction/AVectorHelper.cs
index dbfaab7564..a0f887b043 100644
--- a/ChocolArm64/Instruction/AVectorHelper.cs
+++ b/ChocolArm64/Instruction/AVectorHelper.cs
@@ -93,14 +93,6 @@ namespace ChocolArm64.Instruction
                    Value < ulong.MinValue ? ulong.MinValue : (ulong)Value;
         }
 
-        public static int CountSetBits8(byte Value)
-        {
-            return ((Value >> 0) & 1) + ((Value >> 1) & 1) +
-                   ((Value >> 2) & 1) + ((Value >> 3) & 1) +
-                   ((Value >> 4) & 1) + ((Value >> 5) & 1) +
-                   ((Value >> 6) & 1) +  (Value >> 7);
-        }
-
         public static double Max(double LHS, double RHS)
         {
             if (LHS == 0.0 && RHS == 0.0)
@@ -646,4 +638,4 @@ namespace ChocolArm64.Instruction
             throw new PlatformNotSupportedException();
         }
     }
-}
\ No newline at end of file
+}
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs
index 8bfa7e7c7c..02c5b25b24 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs
@@ -11,7 +11,7 @@ namespace Ryujinx.Tests.Cpu
     using Tester;
     using Tester.Types;
 
-    [Category("Simd")/*, Ignore("Tested: first half of 2018.")*/]
+    [Category("Simd")/*, Ignore("Tested: second half of 2018.")*/]
     public sealed class CpuTestSimd : CpuTest
     {
 #if Simd
@@ -775,6 +775,178 @@ namespace Ryujinx.Tests.Cpu
             });
         }
 
+        [Test, Description("RBIT <Vd>.<T>, <Vn>.<T>")]
+        public void Rbit_V_8B([ValueSource("_8B_")] [Random(1)] ulong A)
+        {
+            uint Opcode = 0x2E605820; // RBIT V0.8B, V1.8B
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0(A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
+
+            AArch64.V(1, new Bits(A));
+            SimdFp.Rbit_V(Op[30], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+            });
+        }
+
+        [Test, Pairwise, Description("RBIT <Vd>.<T>, <Vn>.<T>")]
+        public void Rbit_V_16B([ValueSource("_8B_")] [Random(1)] ulong A0,
+                               [ValueSource("_8B_")] [Random(1)] ulong A1)
+        {
+            uint Opcode = 0x6E605820; // RBIT V0.16B, V1.16B
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1);
+
+            AArch64.Vpart(1, 0, new Bits(A0));
+            AArch64.Vpart(1, 1, new Bits(A1));
+            SimdFp.Rbit_V(Op[30], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Description("REV16 <Vd>.<T>, <Vn>.<T>")]
+        public void Rev16_V_8B([ValueSource("_8B_")] [Random(1)] ulong A)
+        {
+            uint Opcode = 0x0E201820; // REV16 V0.8B, V1.8B
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0(A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
+
+            AArch64.V(1, new Bits(A));
+            SimdFp.Rev16_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+            });
+        }
+
+        [Test, Pairwise, Description("REV16 <Vd>.<T>, <Vn>.<T>")]
+        public void Rev16_V_16B([ValueSource("_8B_")] [Random(1)] ulong A0,
+                                [ValueSource("_8B_")] [Random(1)] ulong A1)
+        {
+            uint Opcode = 0x4E201820; // REV16 V0.16B, V1.16B
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1);
+
+            AArch64.Vpart(1, 0, new Bits(A0));
+            AArch64.Vpart(1, 1, new Bits(A1));
+            SimdFp.Rev16_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Description("REV32 <Vd>.<T>, <Vn>.<T>")]
+        public void Rev32_V_8B_4H([ValueSource("_8B4H_")] [Random(1)] ulong A,
+                                  [Values(0b00u, 0b01u)] uint size) // <8B, 4H>
+        {
+            uint Opcode = 0x2E200820; // REV32 V0.8B, V1.8B
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0(A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
+
+            AArch64.V(1, new Bits(A));
+            SimdFp.Rev32_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+            });
+        }
+
+        [Test, Pairwise, Description("REV32 <Vd>.<T>, <Vn>.<T>")]
+        public void Rev32_V_16B_8H([ValueSource("_8B4H_")] [Random(1)] ulong A0,
+                                   [ValueSource("_8B4H_")] [Random(1)] ulong A1,
+                                   [Values(0b00u, 0b01u)] uint size) // <16B, 8H>
+        {
+            uint Opcode = 0x6E200820; // REV32 V0.16B, V1.16B
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1);
+
+            AArch64.Vpart(1, 0, new Bits(A0));
+            AArch64.Vpart(1, 1, new Bits(A1));
+            SimdFp.Rev32_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
+        [Test, Description("REV64 <Vd>.<T>, <Vn>.<T>")]
+        public void Rev64_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A,
+                                     [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
+        {
+            uint Opcode = 0x0E200820; // REV64 V0.8B, V1.8B
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0(A);
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
+
+            AArch64.V(1, new Bits(A));
+            SimdFp.Rev64_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
+            });
+        }
+
+        [Test, Pairwise, Description("REV64 <Vd>.<T>, <Vn>.<T>")]
+        public void Rev64_V_16B_8H_4S([ValueSource("_8B4H2S_")] [Random(1)] ulong A0,
+                                      [ValueSource("_8B4H2S_")] [Random(1)] ulong A1,
+                                      [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S>
+        {
+            uint Opcode = 0x4E200820; // REV64 V0.16B, V1.16B
+            Opcode |= ((size & 3) << 22);
+            Bits Op = new Bits(Opcode);
+
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1);
+
+            AArch64.Vpart(1, 0, new Bits(A0));
+            AArch64.Vpart(1, 1, new Bits(A1));
+            SimdFp.Rev64_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+            });
+        }
+
         [Test, Description("SQXTN <Vb><d>, <Va><n>")]
         public void Sqxtn_S_HB_SH_DS([ValueSource("_1H1S1D_")] [Random(1)] ulong A,
                                      [Values(0b00u, 0b01u, 0b10u)] uint size) // <HB, SH, DS>
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs
index 60cf1bd054..5e14f55d36 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs
@@ -11,7 +11,7 @@ namespace Ryujinx.Tests.Cpu
     using Tester;
     using Tester.Types;
 
-    [Category("SimdReg")/*, Ignore("Tested: first half of 2018.")*/]
+    [Category("SimdReg")/*, Ignore("Tested: second half of 2018.")*/]
     public sealed class CpuTestSimdReg : CpuTest
     {
 #if SimdReg
diff --git a/Ryujinx.Tests/Cpu/Tester/Instructions.cs b/Ryujinx.Tests/Cpu/Tester/Instructions.cs
index a56aeac932..1590019a70 100644
--- a/Ryujinx.Tests/Cpu/Tester/Instructions.cs
+++ b/Ryujinx.Tests/Cpu/Tester/Instructions.cs
@@ -1974,13 +1974,13 @@ namespace Ryujinx.Tests.Cpu.Tester
 
             switch (Bits.Concat(op, U))
             {
-                default:
                 case Bits bits when bits == "00":
                     comparison = CompareOp.CompareOp_GT;
                     break;
                 case Bits bits when bits == "01":
                     comparison = CompareOp.CompareOp_GE;
                     break;
+                default:
                 case Bits bits when bits == "10":
                     comparison = CompareOp.CompareOp_EQ;
                     break;
@@ -2004,13 +2004,13 @@ namespace Ryujinx.Tests.Cpu.Tester
 
                 switch (comparison)
                 {
-                    default:
                     case CompareOp.CompareOp_GT:
                         test_passed = (element > (BigInteger)0);
                         break;
                     case CompareOp.CompareOp_GE:
                         test_passed = (element >= (BigInteger)0);
                         break;
+                    default:
                     case CompareOp.CompareOp_EQ:
                         test_passed = (element == (BigInteger)0);
                         break;
@@ -2048,13 +2048,13 @@ namespace Ryujinx.Tests.Cpu.Tester
 
             switch (Bits.Concat(op, U))
             {
-                default:
                 case Bits bits when bits == "00":
                     comparison = CompareOp.CompareOp_GT;
                     break;
                 case Bits bits when bits == "01":
                     comparison = CompareOp.CompareOp_GE;
                     break;
+                default:
                 case Bits bits when bits == "10":
                     comparison = CompareOp.CompareOp_EQ;
                     break;
@@ -2078,13 +2078,13 @@ namespace Ryujinx.Tests.Cpu.Tester
 
                 switch (comparison)
                 {
-                    default:
                     case CompareOp.CompareOp_GT:
                         test_passed = (element > (BigInteger)0);
                         break;
                     case CompareOp.CompareOp_GE:
                         test_passed = (element >= (BigInteger)0);
                         break;
+                    default:
                     case CompareOp.CompareOp_EQ:
                         test_passed = (element == (BigInteger)0);
                         break;
@@ -2122,10 +2122,10 @@ namespace Ryujinx.Tests.Cpu.Tester
 
             switch (Bits.Concat(op, U))
             {
-                default:
                 case Bits bits when bits == "00":
                     comparison = CompareOp.CompareOp_GT;
                     break;
+                default:
                 case Bits bits when bits == "01":
                     comparison = CompareOp.CompareOp_GE;
                     break;
@@ -2152,10 +2152,10 @@ namespace Ryujinx.Tests.Cpu.Tester
 
                 switch (comparison)
                 {
-                    default:
                     case CompareOp.CompareOp_GT:
                         test_passed = (element > (BigInteger)0);
                         break;
+                    default:
                     case CompareOp.CompareOp_GE:
                         test_passed = (element >= (BigInteger)0);
                         break;
@@ -2196,10 +2196,10 @@ namespace Ryujinx.Tests.Cpu.Tester
 
             switch (Bits.Concat(op, U))
             {
-                default:
                 case Bits bits when bits == "00":
                     comparison = CompareOp.CompareOp_GT;
                     break;
+                default:
                 case Bits bits when bits == "01":
                     comparison = CompareOp.CompareOp_GE;
                     break;
@@ -2226,10 +2226,10 @@ namespace Ryujinx.Tests.Cpu.Tester
 
                 switch (comparison)
                 {
-                    default:
                     case CompareOp.CompareOp_GT:
                         test_passed = (element > (BigInteger)0);
                         break;
+                    default:
                     case CompareOp.CompareOp_GE:
                         test_passed = (element >= (BigInteger)0);
                         break;
@@ -2418,7 +2418,6 @@ namespace Ryujinx.Tests.Cpu.Tester
 
             switch (Bits.Concat(op, U))
             {
-                default:
                 case Bits bits when bits == "00":
                     comparison = CompareOp.CompareOp_GT;
                     break;
@@ -2428,6 +2427,7 @@ namespace Ryujinx.Tests.Cpu.Tester
                 case Bits bits when bits == "10":
                     comparison = CompareOp.CompareOp_EQ;
                     break;
+                default:
                 case Bits bits when bits == "11":
                     comparison = CompareOp.CompareOp_LE;
                     break;
@@ -2448,7 +2448,6 @@ namespace Ryujinx.Tests.Cpu.Tester
 
                 switch (comparison)
                 {
-                    default:
                     case CompareOp.CompareOp_GT:
                         test_passed = (element > (BigInteger)0);
                         break;
@@ -2458,6 +2457,7 @@ namespace Ryujinx.Tests.Cpu.Tester
                     case CompareOp.CompareOp_EQ:
                         test_passed = (element == (BigInteger)0);
                         break;
+                    default:
                     case CompareOp.CompareOp_LE:
                         test_passed = (element <= (BigInteger)0);
                         break;
@@ -2492,7 +2492,6 @@ namespace Ryujinx.Tests.Cpu.Tester
 
             switch (Bits.Concat(op, U))
             {
-                default:
                 case Bits bits when bits == "00":
                     comparison = CompareOp.CompareOp_GT;
                     break;
@@ -2502,6 +2501,7 @@ namespace Ryujinx.Tests.Cpu.Tester
                 case Bits bits when bits == "10":
                     comparison = CompareOp.CompareOp_EQ;
                     break;
+                default:
                 case Bits bits when bits == "11":
                     comparison = CompareOp.CompareOp_LE;
                     break;
@@ -2522,7 +2522,6 @@ namespace Ryujinx.Tests.Cpu.Tester
 
                 switch (comparison)
                 {
-                    default:
                     case CompareOp.CompareOp_GT:
                         test_passed = (element > (BigInteger)0);
                         break;
@@ -2532,6 +2531,7 @@ namespace Ryujinx.Tests.Cpu.Tester
                     case CompareOp.CompareOp_EQ:
                         test_passed = (element == (BigInteger)0);
                         break;
+                    default:
                     case CompareOp.CompareOp_LE:
                         test_passed = (element <= (BigInteger)0);
                         break;
@@ -2576,7 +2576,6 @@ namespace Ryujinx.Tests.Cpu.Tester
 
                 switch (comparison)
                 {
-                    default:
                     case CompareOp.CompareOp_GT:
                         test_passed = (element > (BigInteger)0);
                         break;
@@ -2589,6 +2588,7 @@ namespace Ryujinx.Tests.Cpu.Tester
                     case CompareOp.CompareOp_LE:
                         test_passed = (element <= (BigInteger)0);
                         break;
+                    default:
                     case CompareOp.CompareOp_LT:
                         test_passed = (element < (BigInteger)0);
                         break;
@@ -2630,7 +2630,6 @@ namespace Ryujinx.Tests.Cpu.Tester
 
                 switch (comparison)
                 {
-                    default:
                     case CompareOp.CompareOp_GT:
                         test_passed = (element > (BigInteger)0);
                         break;
@@ -2643,6 +2642,7 @@ namespace Ryujinx.Tests.Cpu.Tester
                     case CompareOp.CompareOp_LE:
                         test_passed = (element <= (BigInteger)0);
                         break;
+                    default:
                     case CompareOp.CompareOp_LT:
                         test_passed = (element < (BigInteger)0);
                         break;
@@ -2801,6 +2801,265 @@ namespace Ryujinx.Tests.Cpu.Tester
             V(d, result);
         }
 
+        // rbit_advsimd.html
+        public static void Rbit_V(bool Q, Bits Rn, Bits Rd)
+        {
+            /* Decode Vector */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+
+            int esize = 8;
+            int datasize = (Q ? 128 : 64);
+            int elements = datasize / 8;
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operand = V(datasize, n);
+            Bits element;
+            Bits rev = new Bits(esize);
+
+            for (int e = 0; e <= elements - 1; e++)
+            {
+                element = Elem(operand, e, esize);
+
+                for (int i = 0; i <= esize - 1; i++)
+                {
+                    rev[esize - 1 - i] = element[i];
+                }
+
+                Elem(result, e, esize, rev);
+            }
+
+            V(d, result);
+        }
+
+        // rev16_advsimd.html
+        public static void Rev16_V(bool Q, Bits size, Bits Rn, Bits Rd)
+        {
+            const bool U = false;
+            const bool o0 = true;
+
+            /* Decode Vector */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+
+            // size=esize:   B(0),  H(1),  S(1), D(S)
+            int esize = 8 << (int)UInt(size);
+            int datasize = (Q ? 128 : 64);
+
+            // op=REVx: 64(0), 32(1), 16(2)
+            Bits op = Bits.Concat(o0, U);
+
+            // => op+size: 
+            //    64+B = 0, 64+H = 1, 64+S = 2, 64+D = X
+            //    32+B = 1, 32+H = 2, 32+S = X, 32+D = X
+            //    16+B = 2, 16+H = X, 16+S = X, 16+D = X
+            //     8+B = X,  8+H = X,  8+S = X,  8+D = X
+            // => 3-(op+size) (index bits in group)
+            //    64/B = 3, 64+H = 2, 64+S = 1, 64+D = X
+            //    32+B = 2, 32+H = 1, 32+S = X, 32+D = X
+            //    16+B = 1, 16+H = X, 16+S = X, 16+D = X
+            //     8+B = X,  8+H = X,  8+S = X,  8+D = X
+
+            // index bits within group: 1, 2, 3
+            /* if UInt(op) + UInt(size) >= 3 then UnallocatedEncoding(); */
+
+            int container_size;
+
+            switch (op)
+            {
+                default:
+                case Bits bits when bits == "10":
+                    container_size = 16;
+                    break;
+                case Bits bits when bits == "01":
+                    container_size = 32;
+                    break;
+                case Bits bits when bits == "00":
+                    container_size = 64;
+                    break;
+            }
+
+            int containers = datasize / container_size;
+            int elements_per_container = container_size / esize;
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operand = V(datasize, n);
+
+            int element = 0;
+            int rev_element;
+
+            for (int c = 0; c <= containers - 1; c++)
+            {
+                rev_element = element + elements_per_container - 1;
+
+                for (int e = 0; e <= elements_per_container - 1; e++)
+                {
+                    Elem(result, rev_element, esize, Elem(operand, element, esize));
+
+                    element = element + 1;
+                    rev_element = rev_element - 1;
+                }
+            }
+
+            V(d, result);
+        }
+
+        // rev32_advsimd.html
+        public static void Rev32_V(bool Q, Bits size, Bits Rn, Bits Rd)
+        {
+            const bool U = true;
+            const bool o0 = false;
+
+            /* Decode Vector */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+
+            // size=esize:   B(0),  H(1),  S(1), D(S)
+            int esize = 8 << (int)UInt(size);
+            int datasize = (Q ? 128 : 64);
+
+            // op=REVx: 64(0), 32(1), 16(2)
+            Bits op = Bits.Concat(o0, U);
+
+            // => op+size: 
+            //    64+B = 0, 64+H = 1, 64+S = 2, 64+D = X
+            //    32+B = 1, 32+H = 2, 32+S = X, 32+D = X
+            //    16+B = 2, 16+H = X, 16+S = X, 16+D = X
+            //     8+B = X,  8+H = X,  8+S = X,  8+D = X
+            // => 3-(op+size) (index bits in group)
+            //    64/B = 3, 64+H = 2, 64+S = 1, 64+D = X
+            //    32+B = 2, 32+H = 1, 32+S = X, 32+D = X
+            //    16+B = 1, 16+H = X, 16+S = X, 16+D = X
+            //     8+B = X,  8+H = X,  8+S = X,  8+D = X
+
+            // index bits within group: 1, 2, 3
+            /* if UInt(op) + UInt(size) >= 3 then UnallocatedEncoding(); */
+
+            int container_size;
+
+            switch (op)
+            {
+                case Bits bits when bits == "10":
+                    container_size = 16;
+                    break;
+                default:
+                case Bits bits when bits == "01":
+                    container_size = 32;
+                    break;
+                case Bits bits when bits == "00":
+                    container_size = 64;
+                    break;
+            }
+
+            int containers = datasize / container_size;
+            int elements_per_container = container_size / esize;
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operand = V(datasize, n);
+
+            int element = 0;
+            int rev_element;
+
+            for (int c = 0; c <= containers - 1; c++)
+            {
+                rev_element = element + elements_per_container - 1;
+
+                for (int e = 0; e <= elements_per_container - 1; e++)
+                {
+                    Elem(result, rev_element, esize, Elem(operand, element, esize));
+
+                    element = element + 1;
+                    rev_element = rev_element - 1;
+                }
+            }
+
+            V(d, result);
+        }
+
+        // rev64_advsimd.html
+        public static void Rev64_V(bool Q, Bits size, Bits Rn, Bits Rd)
+        {
+            const bool U = false;
+            const bool o0 = false;
+
+            /* Decode Vector */
+            int d = (int)UInt(Rd);
+            int n = (int)UInt(Rn);
+
+            // size=esize:   B(0),  H(1),  S(1), D(S)
+            int esize = 8 << (int)UInt(size);
+            int datasize = (Q ? 128 : 64);
+
+            // op=REVx: 64(0), 32(1), 16(2)
+            Bits op = Bits.Concat(o0, U);
+
+            // => op+size: 
+            //    64+B = 0, 64+H = 1, 64+S = 2, 64+D = X
+            //    32+B = 1, 32+H = 2, 32+S = X, 32+D = X
+            //    16+B = 2, 16+H = X, 16+S = X, 16+D = X
+            //     8+B = X,  8+H = X,  8+S = X,  8+D = X
+            // => 3-(op+size) (index bits in group)
+            //    64/B = 3, 64+H = 2, 64+S = 1, 64+D = X
+            //    32+B = 2, 32+H = 1, 32+S = X, 32+D = X
+            //    16+B = 1, 16+H = X, 16+S = X, 16+D = X
+            //     8+B = X,  8+H = X,  8+S = X,  8+D = X
+
+            // index bits within group: 1, 2, 3
+            /* if UInt(op) + UInt(size) >= 3 then UnallocatedEncoding(); */
+
+            int container_size;
+
+            switch (op)
+            {
+                case Bits bits when bits == "10":
+                    container_size = 16;
+                    break;
+                case Bits bits when bits == "01":
+                    container_size = 32;
+                    break;
+                default:
+                case Bits bits when bits == "00":
+                    container_size = 64;
+                    break;
+            }
+
+            int containers = datasize / container_size;
+            int elements_per_container = container_size / esize;
+
+            /* Operation */
+            /* CheckFPAdvSIMDEnabled64(); */
+
+            Bits result = new Bits(datasize);
+            Bits operand = V(datasize, n);
+
+            int element = 0;
+            int rev_element;
+
+            for (int c = 0; c <= containers - 1; c++)
+            {
+                rev_element = element + elements_per_container - 1;
+
+                for (int e = 0; e <= elements_per_container - 1; e++)
+                {
+                    Elem(result, rev_element, esize, Elem(operand, element, esize));
+
+                    element = element + 1;
+                    rev_element = rev_element - 1;
+                }
+            }
+
+            V(d, result);
+        }
+
         // sqxtn_advsimd.html#SQXTN_asisdmisc_N
         public static void Sqxtn_S(Bits size, Bits Rn, Bits Rd)
         {