From c228cf320d476303da679066c67c3a8c9c6aa3e1 Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Tue, 3 Jul 2018 08:31:16 +0200 Subject: [PATCH] Add Rbit_V instruction. Add 8 tests (Rbit_V; Rev16_V, Rev32_V, Rev64_V). Improve CountSetBits8() algorithm. (#212) * Update AOpCodeTable.cs * Update AInstEmitSimdArithmetic.cs * Update AInstEmitSimdLogical.cs * Update AVectorHelper.cs * Update ASoftFallback.cs * Update Instructions.cs * Update CpuTestSimd.cs * Update CpuTestSimdReg.cs * Improve CountSetBits8() algorithm. * Improve CountSetBits8() algorithm. --- ChocolArm64/AOpCodeTable.cs | 1 + .../Instruction/AInstEmitSimdArithmetic.cs | 4 +- .../Instruction/AInstEmitSimdLogical.cs | 37 ++- ChocolArm64/Instruction/ASoftFallback.cs | 24 +- ChocolArm64/Instruction/AVectorHelper.cs | 10 +- Ryujinx.Tests/Cpu/CpuTestSimd.cs | 174 ++++++++++- Ryujinx.Tests/Cpu/CpuTestSimdReg.cs | 2 +- Ryujinx.Tests/Cpu/Tester/Instructions.cs | 287 +++++++++++++++++- 8 files changed, 502 insertions(+), 37 deletions(-) diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs index 09a6ca4a2b..7cef4398fa 100644 --- a/ChocolArm64/AOpCodeTable.cs +++ b/ChocolArm64/AOpCodeTable.cs @@ -348,6 +348,7 @@ namespace ChocolArm64 SetA64("0x001110101xxxxx000111xxxxxxxxxx", AInstEmit.Orr_V, typeof(AOpCodeSimdReg)); SetA64("0x00111100000xxx<> 3; + int Elems = Bytes >> Op.Size; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rd, Index, Op.Size); EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size); @@ -145,6 +146,31 @@ namespace ChocolArm64.Instruction EmitVectorImmBinaryOp(Context, () => Context.Emit(OpCodes.Or)); } + public static void Rbit_V(AILEmitterCtx Context) + { + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + + int Elems = Op.RegisterSize == ARegisterSize.SIMD128 ? 16 : 8; + + for (int Index = 0; Index < Elems; Index++) + { + EmitVectorExtractZx(Context, Op.Rn, Index, 0); + + Context.Emit(OpCodes.Conv_U4); + + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.ReverseBits8)); + + Context.Emit(OpCodes.Conv_U8); + + EmitVectorInsert(Context, Op.Rd, Index, 0); + } + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + public static void Rev16_V(AILEmitterCtx Context) { EmitRev_V(Context, ContainerSize: 1); @@ -164,18 +190,17 @@ namespace ChocolArm64.Instruction { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; - - int Elems = Bytes >> Op.Size; - if (Op.Size >= ContainerSize) { throw new InvalidOperationException(); } + int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; + int ContainerMask = (1 << (ContainerSize - Op.Size)) - 1; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { int RevIndex = Index ^ ContainerMask; diff --git a/ChocolArm64/Instruction/ASoftFallback.cs b/ChocolArm64/Instruction/ASoftFallback.cs index d626622ab5..5c0a9c8e3a 100644 --- a/ChocolArm64/Instruction/ASoftFallback.cs +++ b/ChocolArm64/Instruction/ASoftFallback.cs @@ -30,6 +30,14 @@ namespace ChocolArm64.Instruction return (ulong)Size; } + public static uint CountSetBits8(uint Value) + { + Value = ((Value >> 1) & 0x55) + (Value & 0x55); + Value = ((Value >> 2) & 0x33) + (Value & 0x33); + + return (Value >> 4) + (Value & 0x0f); + } + private const uint Crc32RevPoly = 0xedb88320; private const uint Crc32cRevPoly = 0x82f63b78; @@ -89,6 +97,14 @@ namespace ChocolArm64.Instruction return Crc; } + public static uint ReverseBits8(uint Value) + { + Value = ((Value & 0xaa) >> 1) | ((Value & 0x55) << 1); + Value = ((Value & 0xcc) >> 2) | ((Value & 0x33) << 2); + + return (Value >> 4) | ((Value & 0x0f) << 4); + } + public static uint ReverseBits32(uint Value) { Value = ((Value & 0xaaaaaaaa) >> 1) | ((Value & 0x55555555) << 1); @@ -101,10 +117,10 @@ namespace ChocolArm64.Instruction public static ulong ReverseBits64(ulong Value) { - Value = ((Value & 0xaaaaaaaaaaaaaaaa) >> 1) | ((Value & 0x5555555555555555) << 1); - Value = ((Value & 0xcccccccccccccccc) >> 2) | ((Value & 0x3333333333333333) << 2); - Value = ((Value & 0xf0f0f0f0f0f0f0f0) >> 4) | ((Value & 0x0f0f0f0f0f0f0f0f) << 4); - Value = ((Value & 0xff00ff00ff00ff00) >> 8) | ((Value & 0x00ff00ff00ff00ff) << 8); + Value = ((Value & 0xaaaaaaaaaaaaaaaa) >> 1 ) | ((Value & 0x5555555555555555) << 1 ); + Value = ((Value & 0xcccccccccccccccc) >> 2 ) | ((Value & 0x3333333333333333) << 2 ); + Value = ((Value & 0xf0f0f0f0f0f0f0f0) >> 4 ) | ((Value & 0x0f0f0f0f0f0f0f0f) << 4 ); + Value = ((Value & 0xff00ff00ff00ff00) >> 8 ) | ((Value & 0x00ff00ff00ff00ff) << 8 ); Value = ((Value & 0xffff0000ffff0000) >> 16) | ((Value & 0x0000ffff0000ffff) << 16); return (Value >> 32) | (Value << 32); diff --git a/ChocolArm64/Instruction/AVectorHelper.cs b/ChocolArm64/Instruction/AVectorHelper.cs index dbfaab7564..a0f887b043 100644 --- a/ChocolArm64/Instruction/AVectorHelper.cs +++ b/ChocolArm64/Instruction/AVectorHelper.cs @@ -93,14 +93,6 @@ namespace ChocolArm64.Instruction Value < ulong.MinValue ? ulong.MinValue : (ulong)Value; } - public static int CountSetBits8(byte Value) - { - return ((Value >> 0) & 1) + ((Value >> 1) & 1) + - ((Value >> 2) & 1) + ((Value >> 3) & 1) + - ((Value >> 4) & 1) + ((Value >> 5) & 1) + - ((Value >> 6) & 1) + (Value >> 7); - } - public static double Max(double LHS, double RHS) { if (LHS == 0.0 && RHS == 0.0) @@ -646,4 +638,4 @@ namespace ChocolArm64.Instruction throw new PlatformNotSupportedException(); } } -} \ No newline at end of file +} diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs index 8bfa7e7c7c..02c5b25b24 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs @@ -11,7 +11,7 @@ namespace Ryujinx.Tests.Cpu using Tester; using Tester.Types; - [Category("Simd")/*, Ignore("Tested: first half of 2018.")*/] + [Category("Simd")/*, Ignore("Tested: second half of 2018.")*/] public sealed class CpuTestSimd : CpuTest { #if Simd @@ -775,6 +775,178 @@ namespace Ryujinx.Tests.Cpu }); } + [Test, Description("RBIT ., .")] + public void Rbit_V_8B([ValueSource("_8B_")] [Random(1)] ulong A) + { + uint Opcode = 0x2E605820; // RBIT V0.8B, V1.8B + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V1 = MakeVectorE0(A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + AArch64.V(1, new Bits(A)); + SimdFp.Rbit_V(Op[30], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + }); + } + + [Test, Pairwise, Description("RBIT ., .")] + public void Rbit_V_16B([ValueSource("_8B_")] [Random(1)] ulong A0, + [ValueSource("_8B_")] [Random(1)] ulong A1) + { + uint Opcode = 0x6E605820; // RBIT V0.16B, V1.16B + Bits Op = new Bits(Opcode); + + Vector128 V1 = MakeVectorE0E1(A0, A1); + AThreadState ThreadState = SingleOpcode(Opcode, V1: V1); + + AArch64.Vpart(1, 0, new Bits(A0)); + AArch64.Vpart(1, 1, new Bits(A1)); + SimdFp.Rbit_V(Op[30], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Description("REV16 ., .")] + public void Rev16_V_8B([ValueSource("_8B_")] [Random(1)] ulong A) + { + uint Opcode = 0x0E201820; // REV16 V0.8B, V1.8B + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V1 = MakeVectorE0(A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + AArch64.V(1, new Bits(A)); + SimdFp.Rev16_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + }); + } + + [Test, Pairwise, Description("REV16 ., .")] + public void Rev16_V_16B([ValueSource("_8B_")] [Random(1)] ulong A0, + [ValueSource("_8B_")] [Random(1)] ulong A1) + { + uint Opcode = 0x4E201820; // REV16 V0.16B, V1.16B + Bits Op = new Bits(Opcode); + + Vector128 V1 = MakeVectorE0E1(A0, A1); + AThreadState ThreadState = SingleOpcode(Opcode, V1: V1); + + AArch64.Vpart(1, 0, new Bits(A0)); + AArch64.Vpart(1, 1, new Bits(A1)); + SimdFp.Rev16_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Description("REV32 ., .")] + public void Rev32_V_8B_4H([ValueSource("_8B4H_")] [Random(1)] ulong A, + [Values(0b00u, 0b01u)] uint size) // <8B, 4H> + { + uint Opcode = 0x2E200820; // REV32 V0.8B, V1.8B + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V1 = MakeVectorE0(A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + AArch64.V(1, new Bits(A)); + SimdFp.Rev32_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + }); + } + + [Test, Pairwise, Description("REV32 ., .")] + public void Rev32_V_16B_8H([ValueSource("_8B4H_")] [Random(1)] ulong A0, + [ValueSource("_8B4H_")] [Random(1)] ulong A1, + [Values(0b00u, 0b01u)] uint size) // <16B, 8H> + { + uint Opcode = 0x6E200820; // REV32 V0.16B, V1.16B + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V1 = MakeVectorE0E1(A0, A1); + AThreadState ThreadState = SingleOpcode(Opcode, V1: V1); + + AArch64.Vpart(1, 0, new Bits(A0)); + AArch64.Vpart(1, 1, new Bits(A1)); + SimdFp.Rev32_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Description("REV64 ., .")] + public void Rev64_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> + { + uint Opcode = 0x0E200820; // REV64 V0.8B, V1.8B + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V1 = MakeVectorE0(A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + AArch64.V(1, new Bits(A)); + SimdFp.Rev64_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + }); + } + + [Test, Pairwise, Description("REV64 ., .")] + public void Rev64_V_16B_8H_4S([ValueSource("_8B4H2S_")] [Random(1)] ulong A0, + [ValueSource("_8B4H2S_")] [Random(1)] ulong A1, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S> + { + uint Opcode = 0x4E200820; // REV64 V0.16B, V1.16B + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V1 = MakeVectorE0E1(A0, A1); + AThreadState ThreadState = SingleOpcode(Opcode, V1: V1); + + AArch64.Vpart(1, 0, new Bits(A0)); + AArch64.Vpart(1, 1, new Bits(A1)); + SimdFp.Rev64_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + [Test, Description("SQXTN , ")] public void Sqxtn_S_HB_SH_DS([ValueSource("_1H1S1D_")] [Random(1)] ulong A, [Values(0b00u, 0b01u, 0b10u)] uint size) // diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs index 60cf1bd054..5e14f55d36 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs @@ -11,7 +11,7 @@ namespace Ryujinx.Tests.Cpu using Tester; using Tester.Types; - [Category("SimdReg")/*, Ignore("Tested: first half of 2018.")*/] + [Category("SimdReg")/*, Ignore("Tested: second half of 2018.")*/] public sealed class CpuTestSimdReg : CpuTest { #if SimdReg diff --git a/Ryujinx.Tests/Cpu/Tester/Instructions.cs b/Ryujinx.Tests/Cpu/Tester/Instructions.cs index a56aeac932..1590019a70 100644 --- a/Ryujinx.Tests/Cpu/Tester/Instructions.cs +++ b/Ryujinx.Tests/Cpu/Tester/Instructions.cs @@ -1974,13 +1974,13 @@ namespace Ryujinx.Tests.Cpu.Tester switch (Bits.Concat(op, U)) { - default: case Bits bits when bits == "00": comparison = CompareOp.CompareOp_GT; break; case Bits bits when bits == "01": comparison = CompareOp.CompareOp_GE; break; + default: case Bits bits when bits == "10": comparison = CompareOp.CompareOp_EQ; break; @@ -2004,13 +2004,13 @@ namespace Ryujinx.Tests.Cpu.Tester switch (comparison) { - default: case CompareOp.CompareOp_GT: test_passed = (element > (BigInteger)0); break; case CompareOp.CompareOp_GE: test_passed = (element >= (BigInteger)0); break; + default: case CompareOp.CompareOp_EQ: test_passed = (element == (BigInteger)0); break; @@ -2048,13 +2048,13 @@ namespace Ryujinx.Tests.Cpu.Tester switch (Bits.Concat(op, U)) { - default: case Bits bits when bits == "00": comparison = CompareOp.CompareOp_GT; break; case Bits bits when bits == "01": comparison = CompareOp.CompareOp_GE; break; + default: case Bits bits when bits == "10": comparison = CompareOp.CompareOp_EQ; break; @@ -2078,13 +2078,13 @@ namespace Ryujinx.Tests.Cpu.Tester switch (comparison) { - default: case CompareOp.CompareOp_GT: test_passed = (element > (BigInteger)0); break; case CompareOp.CompareOp_GE: test_passed = (element >= (BigInteger)0); break; + default: case CompareOp.CompareOp_EQ: test_passed = (element == (BigInteger)0); break; @@ -2122,10 +2122,10 @@ namespace Ryujinx.Tests.Cpu.Tester switch (Bits.Concat(op, U)) { - default: case Bits bits when bits == "00": comparison = CompareOp.CompareOp_GT; break; + default: case Bits bits when bits == "01": comparison = CompareOp.CompareOp_GE; break; @@ -2152,10 +2152,10 @@ namespace Ryujinx.Tests.Cpu.Tester switch (comparison) { - default: case CompareOp.CompareOp_GT: test_passed = (element > (BigInteger)0); break; + default: case CompareOp.CompareOp_GE: test_passed = (element >= (BigInteger)0); break; @@ -2196,10 +2196,10 @@ namespace Ryujinx.Tests.Cpu.Tester switch (Bits.Concat(op, U)) { - default: case Bits bits when bits == "00": comparison = CompareOp.CompareOp_GT; break; + default: case Bits bits when bits == "01": comparison = CompareOp.CompareOp_GE; break; @@ -2226,10 +2226,10 @@ namespace Ryujinx.Tests.Cpu.Tester switch (comparison) { - default: case CompareOp.CompareOp_GT: test_passed = (element > (BigInteger)0); break; + default: case CompareOp.CompareOp_GE: test_passed = (element >= (BigInteger)0); break; @@ -2418,7 +2418,6 @@ namespace Ryujinx.Tests.Cpu.Tester switch (Bits.Concat(op, U)) { - default: case Bits bits when bits == "00": comparison = CompareOp.CompareOp_GT; break; @@ -2428,6 +2427,7 @@ namespace Ryujinx.Tests.Cpu.Tester case Bits bits when bits == "10": comparison = CompareOp.CompareOp_EQ; break; + default: case Bits bits when bits == "11": comparison = CompareOp.CompareOp_LE; break; @@ -2448,7 +2448,6 @@ namespace Ryujinx.Tests.Cpu.Tester switch (comparison) { - default: case CompareOp.CompareOp_GT: test_passed = (element > (BigInteger)0); break; @@ -2458,6 +2457,7 @@ namespace Ryujinx.Tests.Cpu.Tester case CompareOp.CompareOp_EQ: test_passed = (element == (BigInteger)0); break; + default: case CompareOp.CompareOp_LE: test_passed = (element <= (BigInteger)0); break; @@ -2492,7 +2492,6 @@ namespace Ryujinx.Tests.Cpu.Tester switch (Bits.Concat(op, U)) { - default: case Bits bits when bits == "00": comparison = CompareOp.CompareOp_GT; break; @@ -2502,6 +2501,7 @@ namespace Ryujinx.Tests.Cpu.Tester case Bits bits when bits == "10": comparison = CompareOp.CompareOp_EQ; break; + default: case Bits bits when bits == "11": comparison = CompareOp.CompareOp_LE; break; @@ -2522,7 +2522,6 @@ namespace Ryujinx.Tests.Cpu.Tester switch (comparison) { - default: case CompareOp.CompareOp_GT: test_passed = (element > (BigInteger)0); break; @@ -2532,6 +2531,7 @@ namespace Ryujinx.Tests.Cpu.Tester case CompareOp.CompareOp_EQ: test_passed = (element == (BigInteger)0); break; + default: case CompareOp.CompareOp_LE: test_passed = (element <= (BigInteger)0); break; @@ -2576,7 +2576,6 @@ namespace Ryujinx.Tests.Cpu.Tester switch (comparison) { - default: case CompareOp.CompareOp_GT: test_passed = (element > (BigInteger)0); break; @@ -2589,6 +2588,7 @@ namespace Ryujinx.Tests.Cpu.Tester case CompareOp.CompareOp_LE: test_passed = (element <= (BigInteger)0); break; + default: case CompareOp.CompareOp_LT: test_passed = (element < (BigInteger)0); break; @@ -2630,7 +2630,6 @@ namespace Ryujinx.Tests.Cpu.Tester switch (comparison) { - default: case CompareOp.CompareOp_GT: test_passed = (element > (BigInteger)0); break; @@ -2643,6 +2642,7 @@ namespace Ryujinx.Tests.Cpu.Tester case CompareOp.CompareOp_LE: test_passed = (element <= (BigInteger)0); break; + default: case CompareOp.CompareOp_LT: test_passed = (element < (BigInteger)0); break; @@ -2801,6 +2801,265 @@ namespace Ryujinx.Tests.Cpu.Tester V(d, result); } + // rbit_advsimd.html + public static void Rbit_V(bool Q, Bits Rn, Bits Rd) + { + /* Decode Vector */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + + int esize = 8; + int datasize = (Q ? 128 : 64); + int elements = datasize / 8; + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(datasize); + Bits operand = V(datasize, n); + Bits element; + Bits rev = new Bits(esize); + + for (int e = 0; e <= elements - 1; e++) + { + element = Elem(operand, e, esize); + + for (int i = 0; i <= esize - 1; i++) + { + rev[esize - 1 - i] = element[i]; + } + + Elem(result, e, esize, rev); + } + + V(d, result); + } + + // rev16_advsimd.html + public static void Rev16_V(bool Q, Bits size, Bits Rn, Bits Rd) + { + const bool U = false; + const bool o0 = true; + + /* Decode Vector */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + + // size=esize: B(0), H(1), S(1), D(S) + int esize = 8 << (int)UInt(size); + int datasize = (Q ? 128 : 64); + + // op=REVx: 64(0), 32(1), 16(2) + Bits op = Bits.Concat(o0, U); + + // => op+size: + // 64+B = 0, 64+H = 1, 64+S = 2, 64+D = X + // 32+B = 1, 32+H = 2, 32+S = X, 32+D = X + // 16+B = 2, 16+H = X, 16+S = X, 16+D = X + // 8+B = X, 8+H = X, 8+S = X, 8+D = X + // => 3-(op+size) (index bits in group) + // 64/B = 3, 64+H = 2, 64+S = 1, 64+D = X + // 32+B = 2, 32+H = 1, 32+S = X, 32+D = X + // 16+B = 1, 16+H = X, 16+S = X, 16+D = X + // 8+B = X, 8+H = X, 8+S = X, 8+D = X + + // index bits within group: 1, 2, 3 + /* if UInt(op) + UInt(size) >= 3 then UnallocatedEncoding(); */ + + int container_size; + + switch (op) + { + default: + case Bits bits when bits == "10": + container_size = 16; + break; + case Bits bits when bits == "01": + container_size = 32; + break; + case Bits bits when bits == "00": + container_size = 64; + break; + } + + int containers = datasize / container_size; + int elements_per_container = container_size / esize; + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(datasize); + Bits operand = V(datasize, n); + + int element = 0; + int rev_element; + + for (int c = 0; c <= containers - 1; c++) + { + rev_element = element + elements_per_container - 1; + + for (int e = 0; e <= elements_per_container - 1; e++) + { + Elem(result, rev_element, esize, Elem(operand, element, esize)); + + element = element + 1; + rev_element = rev_element - 1; + } + } + + V(d, result); + } + + // rev32_advsimd.html + public static void Rev32_V(bool Q, Bits size, Bits Rn, Bits Rd) + { + const bool U = true; + const bool o0 = false; + + /* Decode Vector */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + + // size=esize: B(0), H(1), S(1), D(S) + int esize = 8 << (int)UInt(size); + int datasize = (Q ? 128 : 64); + + // op=REVx: 64(0), 32(1), 16(2) + Bits op = Bits.Concat(o0, U); + + // => op+size: + // 64+B = 0, 64+H = 1, 64+S = 2, 64+D = X + // 32+B = 1, 32+H = 2, 32+S = X, 32+D = X + // 16+B = 2, 16+H = X, 16+S = X, 16+D = X + // 8+B = X, 8+H = X, 8+S = X, 8+D = X + // => 3-(op+size) (index bits in group) + // 64/B = 3, 64+H = 2, 64+S = 1, 64+D = X + // 32+B = 2, 32+H = 1, 32+S = X, 32+D = X + // 16+B = 1, 16+H = X, 16+S = X, 16+D = X + // 8+B = X, 8+H = X, 8+S = X, 8+D = X + + // index bits within group: 1, 2, 3 + /* if UInt(op) + UInt(size) >= 3 then UnallocatedEncoding(); */ + + int container_size; + + switch (op) + { + case Bits bits when bits == "10": + container_size = 16; + break; + default: + case Bits bits when bits == "01": + container_size = 32; + break; + case Bits bits when bits == "00": + container_size = 64; + break; + } + + int containers = datasize / container_size; + int elements_per_container = container_size / esize; + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(datasize); + Bits operand = V(datasize, n); + + int element = 0; + int rev_element; + + for (int c = 0; c <= containers - 1; c++) + { + rev_element = element + elements_per_container - 1; + + for (int e = 0; e <= elements_per_container - 1; e++) + { + Elem(result, rev_element, esize, Elem(operand, element, esize)); + + element = element + 1; + rev_element = rev_element - 1; + } + } + + V(d, result); + } + + // rev64_advsimd.html + public static void Rev64_V(bool Q, Bits size, Bits Rn, Bits Rd) + { + const bool U = false; + const bool o0 = false; + + /* Decode Vector */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + + // size=esize: B(0), H(1), S(1), D(S) + int esize = 8 << (int)UInt(size); + int datasize = (Q ? 128 : 64); + + // op=REVx: 64(0), 32(1), 16(2) + Bits op = Bits.Concat(o0, U); + + // => op+size: + // 64+B = 0, 64+H = 1, 64+S = 2, 64+D = X + // 32+B = 1, 32+H = 2, 32+S = X, 32+D = X + // 16+B = 2, 16+H = X, 16+S = X, 16+D = X + // 8+B = X, 8+H = X, 8+S = X, 8+D = X + // => 3-(op+size) (index bits in group) + // 64/B = 3, 64+H = 2, 64+S = 1, 64+D = X + // 32+B = 2, 32+H = 1, 32+S = X, 32+D = X + // 16+B = 1, 16+H = X, 16+S = X, 16+D = X + // 8+B = X, 8+H = X, 8+S = X, 8+D = X + + // index bits within group: 1, 2, 3 + /* if UInt(op) + UInt(size) >= 3 then UnallocatedEncoding(); */ + + int container_size; + + switch (op) + { + case Bits bits when bits == "10": + container_size = 16; + break; + case Bits bits when bits == "01": + container_size = 32; + break; + default: + case Bits bits when bits == "00": + container_size = 64; + break; + } + + int containers = datasize / container_size; + int elements_per_container = container_size / esize; + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(datasize); + Bits operand = V(datasize, n); + + int element = 0; + int rev_element; + + for (int c = 0; c <= containers - 1; c++) + { + rev_element = element + elements_per_container - 1; + + for (int e = 0; e <= elements_per_container - 1; e++) + { + Elem(result, rev_element, esize, Elem(operand, element, esize)); + + element = element + 1; + rev_element = rev_element - 1; + } + } + + V(d, result); + } + // sqxtn_advsimd.html#SQXTN_asisdmisc_N public static void Sqxtn_S(Bits size, Bits Rn, Bits Rd) {