From 8f6387128ad6fc6a6106d1347f86ea97e549f5a2 Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Tue, 26 Jun 2018 03:32:29 +0200 Subject: [PATCH] Add Sse Opt. for Cmeq_V_2D, Cmgt_V_2D (Reg). Add Sse Opt. for Crc32cb, Crc32ch, Crc32cw, Crc32cx. Add 10 simple tests for Fcmgt, Fcmge, Fcmeq, Fcmle, Fcmlt (S, V) (Reg, Zero). Add 2 Cnt_V tests. (#183) * Add files via upload * Add files via upload * Add files via upload * CPE * Add EmitSse42Crc32() * Update CpuTestSimdCmp.cs * Update Pseudocode.cs * Update Instructions.cs * Update CpuTestSimd.cs * Update Instructions.cs --- ChocolArm64/AOpCodeTable.cs | 6 +- ChocolArm64/AOptimizations.cs | 12 +- ChocolArm64/Instruction/AInstEmitHash.cs | 52 ++- .../Instruction/AInstEmitSimdArithmetic.cs | 32 +- ChocolArm64/Instruction/AInstEmitSimdCmp.cs | 60 ++- .../Instruction/AInstEmitSimdHelper.cs | 64 +-- Ryujinx.Tests/Cpu/CpuTest.cs | 33 +- Ryujinx.Tests/Cpu/CpuTestSimd.cs | 41 ++ Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs | 22 +- Ryujinx.Tests/Cpu/CpuTestSimdCmp.cs | 375 ++++++++++++++++++ Ryujinx.Tests/Cpu/CpuTestSimdMove.cs | 62 +-- Ryujinx.Tests/Cpu/Tester/Instructions.cs | 41 +- Ryujinx.Tests/Cpu/Tester/Pseudocode.cs | 18 + 13 files changed, 698 insertions(+), 120 deletions(-) create mode 100644 Ryujinx.Tests/Cpu/CpuTestSimdCmp.cs diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs index fcaee38472..e78d0b5727 100644 --- a/ChocolArm64/AOpCodeTable.cs +++ b/ChocolArm64/AOpCodeTable.cs @@ -225,16 +225,16 @@ namespace ChocolArm64 SetA64("000111100x1xxxxxxxxx01xxxxx0xxxx", AInstEmit.Fccmp_S, typeof(AOpCodeSimdFcond)); SetA64("000111100x1xxxxxxxxx01xxxxx1xxxx", AInstEmit.Fccmpe_S, typeof(AOpCodeSimdFcond)); SetA64("010111100x1xxxxx111001xxxxxxxxxx", AInstEmit.Fcmeq_S, typeof(AOpCodeSimdReg)); - SetA64("0>0011100<1xxxxx111001xxxxxxxxxx", AInstEmit.Fcmeq_V, typeof(AOpCodeSimdReg)); SetA64("010111101x100000110110xxxxxxxxxx", AInstEmit.Fcmeq_S, typeof(AOpCodeSimd)); + SetA64("0>0011100<1xxxxx111001xxxxxxxxxx", AInstEmit.Fcmeq_V, typeof(AOpCodeSimdReg)); SetA64("0>0011101<100000110110xxxxxxxxxx", AInstEmit.Fcmeq_V, typeof(AOpCodeSimd)); SetA64("011111100x1xxxxx111001xxxxxxxxxx", AInstEmit.Fcmge_S, typeof(AOpCodeSimdReg)); - SetA64("0>1011100<1xxxxx111001xxxxxxxxxx", AInstEmit.Fcmge_V, typeof(AOpCodeSimdReg)); SetA64("011111101x100000110010xxxxxxxxxx", AInstEmit.Fcmge_S, typeof(AOpCodeSimd)); + SetA64("0>1011100<1xxxxx111001xxxxxxxxxx", AInstEmit.Fcmge_V, typeof(AOpCodeSimdReg)); SetA64("0>1011101<100000110010xxxxxxxxxx", AInstEmit.Fcmge_V, typeof(AOpCodeSimd)); SetA64("011111101x1xxxxx111001xxxxxxxxxx", AInstEmit.Fcmgt_S, typeof(AOpCodeSimdReg)); - SetA64("0>1011101<1xxxxx111001xxxxxxxxxx", AInstEmit.Fcmgt_V, typeof(AOpCodeSimdReg)); SetA64("010111101x100000110010xxxxxxxxxx", AInstEmit.Fcmgt_S, typeof(AOpCodeSimd)); + SetA64("0>1011101<1xxxxx111001xxxxxxxxxx", AInstEmit.Fcmgt_V, typeof(AOpCodeSimdReg)); SetA64("0>0011101<100000110010xxxxxxxxxx", AInstEmit.Fcmgt_V, typeof(AOpCodeSimd)); SetA64("011111101x100000110110xxxxxxxxxx", AInstEmit.Fcmle_S, typeof(AOpCodeSimd)); SetA64("0>1011101<100000110110xxxxxxxxxx", AInstEmit.Fcmle_V, typeof(AOpCodeSimd)); diff --git a/ChocolArm64/AOptimizations.cs b/ChocolArm64/AOptimizations.cs index e8c1f7c444..800cf363d7 100644 --- a/ChocolArm64/AOptimizations.cs +++ b/ChocolArm64/AOptimizations.cs @@ -6,7 +6,15 @@ public static class AOptimizations public static bool GenerateCallStack = true; - public static bool UseSse2IfAvailable = true; + private static bool UseAllSseIfAvailable = true; - internal static bool UseSse2 = UseSse2IfAvailable && Sse2.IsSupported; + private static bool UseSseIfAvailable = true; + private static bool UseSse2IfAvailable = true; + private static bool UseSse41IfAvailable = true; + private static bool UseSse42IfAvailable = true; + + internal static bool UseSse = (UseAllSseIfAvailable && UseSseIfAvailable) && Sse.IsSupported; + internal static bool UseSse2 = (UseAllSseIfAvailable && UseSse2IfAvailable) && Sse2.IsSupported; + internal static bool UseSse41 = (UseAllSseIfAvailable && UseSse41IfAvailable) && Sse41.IsSupported; + internal static bool UseSse42 = (UseAllSseIfAvailable && UseSse42IfAvailable) && Sse42.IsSupported; } \ No newline at end of file diff --git a/ChocolArm64/Instruction/AInstEmitHash.cs b/ChocolArm64/Instruction/AInstEmitHash.cs index 94e03f6c1f..69bdbc480d 100644 --- a/ChocolArm64/Instruction/AInstEmitHash.cs +++ b/ChocolArm64/Instruction/AInstEmitHash.cs @@ -1,7 +1,9 @@ using ChocolArm64.Decoder; using ChocolArm64.State; using ChocolArm64.Translation; +using System; using System.Reflection.Emit; +using System.Runtime.Intrinsics.X86; namespace ChocolArm64.Instruction { @@ -29,22 +31,62 @@ namespace ChocolArm64.Instruction public static void Crc32cb(AILEmitterCtx Context) { - EmitCrc32(Context, nameof(ASoftFallback.Crc32cb)); + if (AOptimizations.UseSse42) + { + EmitSse42Crc32(Context, typeof(uint), typeof(byte)); + } + else + { + EmitCrc32(Context, nameof(ASoftFallback.Crc32cb)); + } } public static void Crc32ch(AILEmitterCtx Context) { - EmitCrc32(Context, nameof(ASoftFallback.Crc32ch)); + if (AOptimizations.UseSse42) + { + EmitSse42Crc32(Context, typeof(uint), typeof(ushort)); + } + else + { + EmitCrc32(Context, nameof(ASoftFallback.Crc32ch)); + } } public static void Crc32cw(AILEmitterCtx Context) { - EmitCrc32(Context, nameof(ASoftFallback.Crc32cw)); + if (AOptimizations.UseSse42) + { + EmitSse42Crc32(Context, typeof(uint), typeof(uint)); + } + else + { + EmitCrc32(Context, nameof(ASoftFallback.Crc32cw)); + } } public static void Crc32cx(AILEmitterCtx Context) { - EmitCrc32(Context, nameof(ASoftFallback.Crc32cx)); + if (AOptimizations.UseSse42) + { + EmitSse42Crc32(Context, typeof(ulong), typeof(ulong)); + } + else + { + EmitCrc32(Context, nameof(ASoftFallback.Crc32cx)); + } + } + + private static void EmitSse42Crc32(AILEmitterCtx Context, Type TCrc, Type TData) + { + AOpCodeAluRs Op = (AOpCodeAluRs)Context.CurrOp; + + Context.EmitLdintzr(Op.Rn); + Context.EmitLdintzr(Op.Rm); + + Context.EmitCall(typeof(Sse42).GetMethod(nameof(Sse42.Crc32), new Type[] { TCrc, TData })); + + Context.EmitStintzr(Op.Rd); } private static void EmitCrc32(AILEmitterCtx Context, string Name) @@ -70,4 +112,4 @@ namespace ChocolArm64.Instruction Context.EmitStintzr(Op.Rd); } } -} \ No newline at end of file +} diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs index 8cd4654b73..8b6e234c1f 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs @@ -319,9 +319,9 @@ namespace ChocolArm64.Instruction public static void Fadd_S(AILEmitterCtx Context) { - if (AOptimizations.UseSse2) + if (AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.AddScalar)); + EmitSseOrSse2CallF(Context, nameof(Sse.AddScalar)); } else { @@ -331,9 +331,9 @@ namespace ChocolArm64.Instruction public static void Fadd_V(AILEmitterCtx Context) { - if (AOptimizations.UseSse2) + if (AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.Add)); + EmitSseOrSse2CallF(Context, nameof(Sse.Add)); } else { @@ -389,9 +389,9 @@ namespace ChocolArm64.Instruction public static void Fdiv_S(AILEmitterCtx Context) { - if (AOptimizations.UseSse2) + if (AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.DivideScalar)); + EmitSseOrSse2CallF(Context, nameof(Sse.DivideScalar)); } else { @@ -401,9 +401,9 @@ namespace ChocolArm64.Instruction public static void Fdiv_V(AILEmitterCtx Context) { - if (AOptimizations.UseSse2) + if (AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.Divide)); + EmitSseOrSse2CallF(Context, nameof(Sse.Divide)); } else { @@ -563,9 +563,9 @@ namespace ChocolArm64.Instruction public static void Fmul_S(AILEmitterCtx Context) { - if (AOptimizations.UseSse2) + if (AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.MultiplyScalar)); + EmitSseOrSse2CallF(Context, nameof(Sse.MultiplyScalar)); } else { @@ -580,9 +580,9 @@ namespace ChocolArm64.Instruction public static void Fmul_V(AILEmitterCtx Context) { - if (AOptimizations.UseSse2) + if (AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.Multiply)); + EmitSseOrSse2CallF(Context, nameof(Sse.Multiply)); } else { @@ -1019,9 +1019,9 @@ namespace ChocolArm64.Instruction public static void Fsub_S(AILEmitterCtx Context) { - if (AOptimizations.UseSse2) + if (AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.SubtractScalar)); + EmitSseOrSse2CallF(Context, nameof(Sse.SubtractScalar)); } else { @@ -1031,9 +1031,9 @@ namespace ChocolArm64.Instruction public static void Fsub_V(AILEmitterCtx Context) { - if (AOptimizations.UseSse2) + if (AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.Subtract)); + EmitSseOrSse2CallF(Context, nameof(Sse.Subtract)); } else { diff --git a/ChocolArm64/Instruction/AInstEmitSimdCmp.cs b/ChocolArm64/Instruction/AInstEmitSimdCmp.cs index ba8ac3e2a7..68a7ab8808 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdCmp.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdCmp.cs @@ -19,9 +19,20 @@ namespace ChocolArm64.Instruction public static void Cmeq_V(AILEmitterCtx Context) { - if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg Op && Op.Size < 3) + if (Context.CurrOp is AOpCodeSimdReg Op) { - EmitSse2Call(Context, nameof(Sse2.CompareEqual)); + if (Op.Size < 3 && AOptimizations.UseSse2) + { + EmitSse2Call(Context, nameof(Sse2.CompareEqual)); + } + else if (Op.Size == 3 && AOptimizations.UseSse41) + { + EmitSse41Call(Context, nameof(Sse41.CompareEqual)); + } + else + { + EmitCmp(Context, OpCodes.Beq_S, Scalar: false); + } } else { @@ -46,9 +57,20 @@ namespace ChocolArm64.Instruction public static void Cmgt_V(AILEmitterCtx Context) { - if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg Op && Op.Size < 3) + if (Context.CurrOp is AOpCodeSimdReg Op) { - EmitSse2Call(Context, nameof(Sse2.CompareGreaterThan)); + if (Op.Size < 3 && AOptimizations.UseSse2) + { + EmitSse2Call(Context, nameof(Sse2.CompareGreaterThan)); + } + else if (Op.Size == 3 && AOptimizations.UseSse42) + { + EmitSse42Call(Context, nameof(Sse42.CompareGreaterThan)); + } + else + { + EmitCmp(Context, OpCodes.Bgt_S, Scalar: false); + } } else { @@ -133,9 +155,10 @@ namespace ChocolArm64.Instruction public static void Fcmeq_S(AILEmitterCtx Context) { - if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg) + if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse + && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.CompareEqualScalar)); + EmitSseOrSse2CallF(Context, nameof(Sse.CompareEqualScalar)); } else { @@ -145,9 +168,10 @@ namespace ChocolArm64.Instruction public static void Fcmeq_V(AILEmitterCtx Context) { - if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg) + if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse + && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.CompareEqual)); + EmitSseOrSse2CallF(Context, nameof(Sse.CompareEqual)); } else { @@ -157,9 +181,10 @@ namespace ChocolArm64.Instruction public static void Fcmge_S(AILEmitterCtx Context) { - if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg) + if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse + && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThanOrEqualScalar)); + EmitSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanOrEqualScalar)); } else { @@ -169,9 +194,10 @@ namespace ChocolArm64.Instruction public static void Fcmge_V(AILEmitterCtx Context) { - if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg) + if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse + && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThanOrEqual)); + EmitSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanOrEqual)); } else { @@ -181,9 +207,10 @@ namespace ChocolArm64.Instruction public static void Fcmgt_S(AILEmitterCtx Context) { - if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg) + if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse + && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThanScalar)); + EmitSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanScalar)); } else { @@ -193,9 +220,10 @@ namespace ChocolArm64.Instruction public static void Fcmgt_V(AILEmitterCtx Context) { - if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg) + if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse + && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThan)); + EmitSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThan)); } else { diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs index 3caf2a3ed2..80c6aeb7a7 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs @@ -3,6 +3,7 @@ using ChocolArm64.State; using ChocolArm64.Translation; using System; using System.Reflection; +using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; @@ -34,11 +35,27 @@ namespace ChocolArm64.Instruction return (8 << (Op.Size + 1)) - Op.Imm; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void EmitSse2Call(AILEmitterCtx Context, string Name) { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + EmitSseCall(Context, Name, typeof(Sse2)); + } - int SizeF = Op.Size & 1; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void EmitSse41Call(AILEmitterCtx Context, string Name) + { + EmitSseCall(Context, Name, typeof(Sse41)); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void EmitSse42Call(AILEmitterCtx Context, string Name) + { + EmitSseCall(Context, Name, typeof(Sse42)); + } + + private static void EmitSseCall(AILEmitterCtx Context, string Name, Type Type) + { + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; void Ldvec(int Reg) { @@ -57,8 +74,6 @@ namespace ChocolArm64.Instruction Type BaseType = null; - Type[] Types; - switch (Op.Size) { case 0: BaseType = typeof(Vector128); break; @@ -71,15 +86,13 @@ namespace ChocolArm64.Instruction { Ldvec(BinOp.Rm); - Types = new Type[] { BaseType, BaseType }; + Context.EmitCall(Type.GetMethod(Name, new Type[] { BaseType, BaseType })); } else { - Types = new Type[] { BaseType }; + Context.EmitCall(Type.GetMethod(Name, new Type[] { BaseType })); } - Context.EmitCall(typeof(Sse2).GetMethod(Name, Types)); - switch (Op.Size) { case 0: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSByteToSingle)); break; @@ -96,7 +109,7 @@ namespace ChocolArm64.Instruction } } - public static void EmitSse2CallF(AILEmitterCtx Context, string Name) + public static void EmitSseOrSse2CallF(AILEmitterCtx Context, string Name) { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; @@ -114,36 +127,31 @@ namespace ChocolArm64.Instruction Ldvec(Op.Rn); - Type BaseType = SizeF == 0 - ? typeof(Vector128) - : typeof(Vector128); + Type Type; + Type BaseType; - Type[] Types; + if (SizeF == 0) + { + Type = typeof(Sse); + BaseType = typeof(Vector128); + } + else /* if (SizeF == 1) */ + { + Type = typeof(Sse2); + BaseType = typeof(Vector128); + } if (Op is AOpCodeSimdReg BinOp) { Ldvec(BinOp.Rm); - Types = new Type[] { BaseType, BaseType }; + Context.EmitCall(Type.GetMethod(Name, new Type[] { BaseType, BaseType })); } else { - Types = new Type[] { BaseType }; + Context.EmitCall(Type.GetMethod(Name, new Type[] { BaseType })); } - MethodInfo MthdInfo; - - if (SizeF == 0) - { - MthdInfo = typeof(Sse).GetMethod(Name, Types); - } - else /* if (SizeF == 1) */ - { - MthdInfo = typeof(Sse2).GetMethod(Name, Types); - } - - Context.EmitCall(MthdInfo); - if (SizeF == 1) { AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorDoubleToSingle)); diff --git a/Ryujinx.Tests/Cpu/CpuTest.cs b/Ryujinx.Tests/Cpu/CpuTest.cs index e2442ee492..2af50c6c89 100644 --- a/Ryujinx.Tests/Cpu/CpuTest.cs +++ b/Ryujinx.Tests/Cpu/CpuTest.cs @@ -113,6 +113,22 @@ namespace Ryujinx.Tests.Cpu return GetThreadState(); } + protected static Vector128 MakeVectorE0(double A0) + { + return Sse.StaticCast(Sse2.SetVector128(0, BitConverter.DoubleToInt64Bits(A0))); + } + + protected static Vector128 MakeVectorE0E1(double A0, double A1) + { + return Sse.StaticCast(Sse2.SetVector128(BitConverter.DoubleToInt64Bits(A1), + BitConverter.DoubleToInt64Bits(A0))); + } + + protected static Vector128 MakeVectorE1(double A1) + { + return Sse.StaticCast(Sse2.SetVector128(BitConverter.DoubleToInt64Bits(A1), 0)); + } + protected static double VectorExtractDouble(Vector128 Vector, byte Index) { long Value = Sse41.Extract(Sse.StaticCast(Vector), Index); @@ -120,24 +136,19 @@ namespace Ryujinx.Tests.Cpu return BitConverter.Int64BitsToDouble(Value); } - protected static Vector128 MakeVectorE0(double A) + protected static Vector128 MakeVectorE0(ulong A0) { - return Sse.StaticCast(Sse2.SetVector128(0, BitConverter.DoubleToInt64Bits(A))); + return Sse.StaticCast(Sse2.SetVector128(0, A0)); } - protected static Vector128 MakeVectorE0(ulong A) + protected static Vector128 MakeVectorE0E1(ulong A0, ulong A1) { - return Sse.StaticCast(Sse2.SetVector128(0, A)); + return Sse.StaticCast(Sse2.SetVector128(A1, A0)); } - protected static Vector128 MakeVectorE0E1(ulong A, ulong B) + protected static Vector128 MakeVectorE1(ulong A1) { - return Sse.StaticCast(Sse2.SetVector128(B, A)); - } - - protected static Vector128 MakeVectorE1(ulong B) - { - return Sse.StaticCast(Sse2.SetVector128(B, 0)); + return Sse.StaticCast(Sse2.SetVector128(A1, 0)); } protected static ulong GetVectorE0(Vector128 Vector) diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs index 6cc823042a..90461728a9 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs @@ -628,6 +628,47 @@ namespace Ryujinx.Tests.Cpu }); } + [Test, Description("CNT ., .")] + public void Cnt_V_8B([ValueSource("_8B_")] [Random(1)] ulong A) + { + uint Opcode = 0x0E205820; // CNT V0.8B, V1.8B + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V1 = MakeVectorE0(A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + AArch64.V(1, new Bits(A)); + SimdFp.Cnt_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + }); + } + + [Test, Pairwise, Description("CNT ., .")] + public void Cnt_V_16B([ValueSource("_8B_")] [Random(1)] ulong A0, + [ValueSource("_8B_")] [Random(1)] ulong A1) + { + uint Opcode = 0x4E205820; // CNT V0.16B, V1.16B + Bits Op = new Bits(Opcode); + + Vector128 V1 = MakeVectorE0E1(A0, A1); + AThreadState ThreadState = SingleOpcode(Opcode, V1: V1); + + AArch64.Vpart(1, 0, new Bits(A0)); + AArch64.Vpart(1, 1, new Bits(A1)); + SimdFp.Cnt_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + [Test, Description("NEG , ")] public void Neg_S_D([ValueSource("_1D_")] [Random(1)] ulong A) { diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs index d04eca7b55..98be2fc5be 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs @@ -176,10 +176,13 @@ namespace Ryujinx.Tests.Cpu { AThreadState ThreadState = SingleOpcode(0x4EA1D802, V0: Sse.SetAllVector128(A)); - Assert.That(Sse41.Extract(ThreadState.V2, (byte)0), Is.EqualTo(1 / A)); - Assert.That(Sse41.Extract(ThreadState.V2, (byte)1), Is.EqualTo(1 / A)); - Assert.That(Sse41.Extract(ThreadState.V2, (byte)2), Is.EqualTo(1 / A)); - Assert.That(Sse41.Extract(ThreadState.V2, (byte)3), Is.EqualTo(1 / A)); + Assert.Multiple(() => + { + Assert.That(Sse41.Extract(ThreadState.V2, (byte)0), Is.EqualTo(1 / A)); + Assert.That(Sse41.Extract(ThreadState.V2, (byte)1), Is.EqualTo(1 / A)); + Assert.That(Sse41.Extract(ThreadState.V2, (byte)2), Is.EqualTo(1 / A)); + Assert.That(Sse41.Extract(ThreadState.V2, (byte)3), Is.EqualTo(1 / A)); + }); } [Test, Description("FRECPS D0, D1, D2")] @@ -199,10 +202,13 @@ namespace Ryujinx.Tests.Cpu V2: Sse.SetAllVector128(A), V0: Sse.SetAllVector128(B)); - Assert.That(Sse41.Extract(ThreadState.V4, (byte)0), Is.EqualTo(2 - (A * B))); - Assert.That(Sse41.Extract(ThreadState.V4, (byte)1), Is.EqualTo(2 - (A * B))); - Assert.That(Sse41.Extract(ThreadState.V4, (byte)2), Is.EqualTo(2 - (A * B))); - Assert.That(Sse41.Extract(ThreadState.V4, (byte)3), Is.EqualTo(2 - (A * B))); + Assert.Multiple(() => + { + Assert.That(Sse41.Extract(ThreadState.V4, (byte)0), Is.EqualTo(2 - (A * B))); + Assert.That(Sse41.Extract(ThreadState.V4, (byte)1), Is.EqualTo(2 - (A * B))); + Assert.That(Sse41.Extract(ThreadState.V4, (byte)2), Is.EqualTo(2 - (A * B))); + Assert.That(Sse41.Extract(ThreadState.V4, (byte)3), Is.EqualTo(2 - (A * B))); + }); } [TestCase(0x3FE66666u, false, 0x40000000u)] diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdCmp.cs b/Ryujinx.Tests/Cpu/CpuTestSimdCmp.cs new file mode 100644 index 0000000000..41f5113d6e --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdCmp.cs @@ -0,0 +1,375 @@ +using ChocolArm64.State; + +using NUnit.Framework; + +using System; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace Ryujinx.Tests.Cpu +{ + public class CpuTestSimdCmp : CpuTest + { +#region "ValueSource" + private static float[] _floats_() + { + return new float[] { float.NegativeInfinity, float.MinValue, -1f, -0f, + +0f, +1f, float.MaxValue, float.PositiveInfinity }; + } + + private static double[] _doubles_() + { + return new double[] { double.NegativeInfinity, double.MinValue, -1d, -0d, + +0d, +1d, double.MaxValue, double.PositiveInfinity }; + } +#endregion + + [Test, Description("FCMEQ D0, D1, D2 | FCMGE D0, D1, D2 | FCMGT D0, D1, D2")] + public void Fcmeq_Fcmge_Fcmgt_Reg_S_D([ValueSource("_doubles_")] [Random(8)] double A, + [ValueSource("_doubles_")] [Random(8)] double B, + [Values(0u, 1u, 3u)] uint EU) // EQ, GE, GT + { + uint Opcode = 0x5E62E420 | ((EU & 1) << 29) | ((EU >> 1) << 23); + Vector128 V0 = Sse.StaticCast(Sse2.SetAllVector128(TestContext.CurrentContext.Random.NextDouble())); + Vector128 V1 = Sse.StaticCast(Sse2.SetScalarVector128(A)); + Vector128 V2 = Sse.StaticCast(Sse2.SetScalarVector128(B)); + + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + byte[] Exp = default(byte[]); + byte[] Ones = new byte[] {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; + byte[] Zeros = new byte[] {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + + switch (EU) + { + case 0: Exp = (A == B ? Ones : Zeros); break; + case 1: Exp = (A >= B ? Ones : Zeros); break; + case 3: Exp = (A > B ? Ones : Zeros); break; + } + + Assert.Multiple(() => + { + Assert.That(BitConverter.GetBytes(VectorExtractDouble(ThreadState.V0, (byte)0)), Is.EquivalentTo(Exp)); + Assert.That(VectorExtractDouble(ThreadState.V0, (byte)1), Is.Zero); + }); + } + + [Test, Description("FCMEQ S0, S1, S2 | FCMGE S0, S1, S2 | FCMGT S0, S1, S2")] + public void Fcmeq_Fcmge_Fcmgt_Reg_S_S([ValueSource("_floats_")] [Random(8)] float A, + [ValueSource("_floats_")] [Random(8)] float B, + [Values(0u, 1u, 3u)] uint EU) // EQ, GE, GT + { + uint Opcode = 0x5E22E420 | ((EU & 1) << 29) | ((EU >> 1) << 23); + Vector128 V0 = Sse.SetAllVector128(TestContext.CurrentContext.Random.NextFloat()); + Vector128 V1 = Sse.SetScalarVector128(A); + Vector128 V2 = Sse.SetScalarVector128(B); + + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + byte[] Exp = default(byte[]); + byte[] Ones = new byte[] {0xFF, 0xFF, 0xFF, 0xFF}; + byte[] Zeros = new byte[] {0x00, 0x00, 0x00, 0x00}; + + switch (EU) + { + case 0: Exp = (A == B ? Ones : Zeros); break; + case 1: Exp = (A >= B ? Ones : Zeros); break; + case 3: Exp = (A > B ? Ones : Zeros); break; + } + + Assert.Multiple(() => + { + Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)0)), Is.EquivalentTo(Exp)); + Assert.That(Sse41.Extract(ThreadState.V0, (byte)1), Is.Zero); + Assert.That(Sse41.Extract(ThreadState.V0, (byte)2), Is.Zero); + Assert.That(Sse41.Extract(ThreadState.V0, (byte)3), Is.Zero); + }); + } + + [Test, Description("FCMEQ V0.2D, V1.2D, V2.2D | FCMGE V0.2D, V1.2D, V2.2D | FCMGT V0.2D, V1.2D, V2.2D")] + public void Fcmeq_Fcmge_Fcmgt_Reg_V_2D([ValueSource("_doubles_")] [Random(8)] double A, + [ValueSource("_doubles_")] [Random(8)] double B, + [Values(0u, 1u, 3u)] uint EU) // EQ, GE, GT + { + uint Opcode = 0x4E62E420 | ((EU & 1) << 29) | ((EU >> 1) << 23); + Vector128 V1 = Sse.StaticCast(Sse2.SetAllVector128(A)); + Vector128 V2 = Sse.StaticCast(Sse2.SetAllVector128(B)); + + AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); + + byte[] Exp = default(byte[]); + byte[] Ones = new byte[] {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; + byte[] Zeros = new byte[] {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + + switch (EU) + { + case 0: Exp = (A == B ? Ones : Zeros); break; + case 1: Exp = (A >= B ? Ones : Zeros); break; + case 3: Exp = (A > B ? Ones : Zeros); break; + } + + Assert.Multiple(() => + { + Assert.That(BitConverter.GetBytes(VectorExtractDouble(ThreadState.V0, (byte)0)), Is.EquivalentTo(Exp)); + Assert.That(BitConverter.GetBytes(VectorExtractDouble(ThreadState.V0, (byte)1)), Is.EquivalentTo(Exp)); + }); + } + + [Test, Description("FCMEQ V0.2S, V1.2S, V2.2S | FCMGE V0.2S, V1.2S, V2.2S | FCMGT V0.2S, V1.2S, V2.2S")] + public void Fcmeq_Fcmge_Fcmgt_Reg_V_2S([ValueSource("_floats_")] [Random(8)] float A, + [ValueSource("_floats_")] [Random(8)] float B, + [Values(0u, 1u, 3u)] uint EU) // EQ, GE, GT + { + uint Opcode = 0x0E22E420 | ((EU & 1) << 29) | ((EU >> 1) << 23); + Vector128 V0 = Sse.SetAllVector128(TestContext.CurrentContext.Random.NextFloat()); + Vector128 V1 = Sse.SetVector128(0, 0, A, A); + Vector128 V2 = Sse.SetVector128(0, 0, B, B); + + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + byte[] Exp = default(byte[]); + byte[] Ones = new byte[] {0xFF, 0xFF, 0xFF, 0xFF}; + byte[] Zeros = new byte[] {0x00, 0x00, 0x00, 0x00}; + + switch (EU) + { + case 0: Exp = (A == B ? Ones : Zeros); break; + case 1: Exp = (A >= B ? Ones : Zeros); break; + case 3: Exp = (A > B ? Ones : Zeros); break; + } + + Assert.Multiple(() => + { + Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)0)), Is.EquivalentTo(Exp)); + Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)1)), Is.EquivalentTo(Exp)); + Assert.That(Sse41.Extract(ThreadState.V0, (byte)2), Is.Zero); + Assert.That(Sse41.Extract(ThreadState.V0, (byte)3), Is.Zero); + }); + } + + [Test, Description("FCMEQ V0.4S, V1.4S, V2.4S | FCMGE V0.4S, V1.4S, V2.4S | FCMGT V0.4S, V1.4S, V2.4S")] + public void Fcmeq_Fcmge_Fcmgt_Reg_V_4S([ValueSource("_floats_")] [Random(8)] float A, + [ValueSource("_floats_")] [Random(8)] float B, + [Values(0u, 1u, 3u)] uint EU) // EQ, GE, GT + { + uint Opcode = 0x4E22E420 | ((EU & 1) << 29) | ((EU >> 1) << 23); + Vector128 V1 = Sse.SetAllVector128(A); + Vector128 V2 = Sse.SetAllVector128(B); + + AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); + + byte[] Exp = default(byte[]); + byte[] Ones = new byte[] {0xFF, 0xFF, 0xFF, 0xFF}; + byte[] Zeros = new byte[] {0x00, 0x00, 0x00, 0x00}; + + switch (EU) + { + case 0: Exp = (A == B ? Ones : Zeros); break; + case 1: Exp = (A >= B ? Ones : Zeros); break; + case 3: Exp = (A > B ? Ones : Zeros); break; + } + + Assert.Multiple(() => + { + Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)0)), Is.EquivalentTo(Exp)); + Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)1)), Is.EquivalentTo(Exp)); + Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)2)), Is.EquivalentTo(Exp)); + Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)3)), Is.EquivalentTo(Exp)); + }); + } + + [Test, Description("FCMGT D0, D1, #0.0 | FCMGE D0, D1, #0.0 | FCMEQ D0, D1, #0.0 | FCMLE D0, D1, #0.0 | FCMLT D0, D1, #0.0")] + public void Fcmgt_Fcmge_Fcmeq_Fcmle_Fcmlt_Zero_S_D([ValueSource("_doubles_")] [Random(8)] double A, + [Values(0u, 1u, 2u, 3u)] uint opU, // GT, GE, EQ, LE + [Values(0u, 1u)] uint bit13) // "LT" + { + uint Opcode = 0x5EE0C820 | (((opU & 1) & ~bit13) << 29) | (bit13 << 13) | (((opU >> 1) & ~bit13) << 12); + Vector128 V0 = Sse.StaticCast(Sse2.SetAllVector128(TestContext.CurrentContext.Random.NextDouble())); + Vector128 V1 = Sse.StaticCast(Sse2.SetScalarVector128(A)); + + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + double Zero = +0d; + byte[] Exp = default(byte[]); + byte[] Ones = new byte[] {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; + byte[] Zeros = new byte[] {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + + if (bit13 == 0) + { + switch (opU) + { + case 0: Exp = (A > Zero ? Ones : Zeros); break; + case 1: Exp = (A >= Zero ? Ones : Zeros); break; + case 2: Exp = (A == Zero ? Ones : Zeros); break; + case 3: Exp = (Zero >= A ? Ones : Zeros); break; + } + } + else + { + Exp = (Zero > A ? Ones : Zeros); + } + + Assert.Multiple(() => + { + Assert.That(BitConverter.GetBytes(VectorExtractDouble(ThreadState.V0, (byte)0)), Is.EquivalentTo(Exp)); + Assert.That(VectorExtractDouble(ThreadState.V0, (byte)1), Is.Zero); + }); + } + + [Test, Description("FCMGT S0, S1, #0.0 | FCMGE S0, S1, #0.0 | FCMEQ S0, S1, #0.0 | FCMLE S0, S1, #0.0 | FCMLT S0, S1, #0.0")] + public void Fcmgt_Fcmge_Fcmeq_Fcmle_Fcmlt_Zero_S_S([ValueSource("_floats_")] [Random(8)] float A, + [Values(0u, 1u, 2u, 3u)] uint opU, // GT, GE, EQ, LE + [Values(0u, 1u)] uint bit13) // "LT" + { + uint Opcode = 0x5EA0C820 | (((opU & 1) & ~bit13) << 29) | (bit13 << 13) | (((opU >> 1) & ~bit13) << 12); + Vector128 V0 = Sse.SetAllVector128(TestContext.CurrentContext.Random.NextFloat()); + Vector128 V1 = Sse.SetScalarVector128(A); + + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + float Zero = +0f; + byte[] Exp = default(byte[]); + byte[] Ones = new byte[] {0xFF, 0xFF, 0xFF, 0xFF}; + byte[] Zeros = new byte[] {0x00, 0x00, 0x00, 0x00}; + + if (bit13 == 0) + { + switch (opU) + { + case 0: Exp = (A > Zero ? Ones : Zeros); break; + case 1: Exp = (A >= Zero ? Ones : Zeros); break; + case 2: Exp = (A == Zero ? Ones : Zeros); break; + case 3: Exp = (Zero >= A ? Ones : Zeros); break; + } + } + else + { + Exp = (Zero > A ? Ones : Zeros); + } + + Assert.Multiple(() => + { + Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)0)), Is.EquivalentTo(Exp)); + Assert.That(Sse41.Extract(ThreadState.V0, (byte)1), Is.Zero); + Assert.That(Sse41.Extract(ThreadState.V0, (byte)2), Is.Zero); + Assert.That(Sse41.Extract(ThreadState.V0, (byte)3), Is.Zero); + }); + } + + [Test, Description("FCMGT V0.2D, V1.2D, #0.0 | FCMGE V0.2D, V1.2D, #0.0 | FCMEQ V0.2D, V1.2D, #0.0 | FCMLE V0.2D, V1.2D, #0.0 | FCMLT V0.2D, V1.2D, #0.0")] + public void Fcmgt_Fcmge_Fcmeq_Fcmle_Fcmlt_Zero_V_2D([ValueSource("_doubles_")] [Random(8)] double A, + [Values(0u, 1u, 2u, 3u)] uint opU, // GT, GE, EQ, LE + [Values(0u, 1u)] uint bit13) // "LT" + { + uint Opcode = 0x4EE0C820 | (((opU & 1) & ~bit13) << 29) | (bit13 << 13) | (((opU >> 1) & ~bit13) << 12); + Vector128 V1 = Sse.StaticCast(Sse2.SetAllVector128(A)); + + AThreadState ThreadState = SingleOpcode(Opcode, V1: V1); + + double Zero = +0d; + byte[] Exp = default(byte[]); + byte[] Ones = new byte[] {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; + byte[] Zeros = new byte[] {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + + if (bit13 == 0) + { + switch (opU) + { + case 0: Exp = (A > Zero ? Ones : Zeros); break; + case 1: Exp = (A >= Zero ? Ones : Zeros); break; + case 2: Exp = (A == Zero ? Ones : Zeros); break; + case 3: Exp = (Zero >= A ? Ones : Zeros); break; + } + } + else + { + Exp = (Zero > A ? Ones : Zeros); + } + + Assert.Multiple(() => + { + Assert.That(BitConverter.GetBytes(VectorExtractDouble(ThreadState.V0, (byte)0)), Is.EquivalentTo(Exp)); + Assert.That(BitConverter.GetBytes(VectorExtractDouble(ThreadState.V0, (byte)1)), Is.EquivalentTo(Exp)); + }); + } + + [Test, Description("FCMGT V0.2S, V1.2S, #0.0 | FCMGE V0.2S, V1.2S, #0.0 | FCMEQ V0.2S, V1.2S, #0.0 | FCMLE V0.2S, V1.2S, #0.0 | FCMLT V0.2S, V1.2S, #0.0")] + public void Fcmgt_Fcmge_Fcmeq_Fcmle_Fcmlt_Zero_V_2S([ValueSource("_floats_")] [Random(8)] float A, + [Values(0u, 1u, 2u, 3u)] uint opU, // GT, GE, EQ, LE + [Values(0u, 1u)] uint bit13) // "LT" + { + uint Opcode = 0x0EA0C820 | (((opU & 1) & ~bit13) << 29) | (bit13 << 13) | (((opU >> 1) & ~bit13) << 12); + Vector128 V0 = Sse.SetAllVector128(TestContext.CurrentContext.Random.NextFloat()); + Vector128 V1 = Sse.SetVector128(0, 0, A, A); + + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + float Zero = +0f; + byte[] Exp = default(byte[]); + byte[] Ones = new byte[] {0xFF, 0xFF, 0xFF, 0xFF}; + byte[] Zeros = new byte[] {0x00, 0x00, 0x00, 0x00}; + + if (bit13 == 0) + { + switch (opU) + { + case 0: Exp = (A > Zero ? Ones : Zeros); break; + case 1: Exp = (A >= Zero ? Ones : Zeros); break; + case 2: Exp = (A == Zero ? Ones : Zeros); break; + case 3: Exp = (Zero >= A ? Ones : Zeros); break; + } + } + else + { + Exp = (Zero > A ? Ones : Zeros); + } + + Assert.Multiple(() => + { + Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)0)), Is.EquivalentTo(Exp)); + Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)1)), Is.EquivalentTo(Exp)); + Assert.That(Sse41.Extract(ThreadState.V0, (byte)2), Is.Zero); + Assert.That(Sse41.Extract(ThreadState.V0, (byte)3), Is.Zero); + }); + } + + [Test, Description("FCMGT V0.4S, V1.4S, #0.0 | FCMGE V0.4S, V1.4S, #0.0 | FCMEQ V0.4S, V1.4S, #0.0 | FCMLE V0.4S, V1.4S, #0.0 | FCMLT V0.4S, V1.4S, #0.0")] + public void Fcmgt_Fcmge_Fcmeq_Fcmle_Fcmlt_Zero_V_4S([ValueSource("_floats_")] [Random(8)] float A, + [Values(0u, 1u, 2u, 3u)] uint opU, // GT, GE, EQ, LE + [Values(0u, 1u)] uint bit13) // "LT" + { + uint Opcode = 0x4EA0C820 | (((opU & 1) & ~bit13) << 29) | (bit13 << 13) | (((opU >> 1) & ~bit13) << 12); + Vector128 V1 = Sse.SetAllVector128(A); + + AThreadState ThreadState = SingleOpcode(Opcode, V1: V1); + + float Zero = +0f; + byte[] Exp = default(byte[]); + byte[] Ones = new byte[] {0xFF, 0xFF, 0xFF, 0xFF}; + byte[] Zeros = new byte[] {0x00, 0x00, 0x00, 0x00}; + + if (bit13 == 0) + { + switch (opU) + { + case 0: Exp = (A > Zero ? Ones : Zeros); break; + case 1: Exp = (A >= Zero ? Ones : Zeros); break; + case 2: Exp = (A == Zero ? Ones : Zeros); break; + case 3: Exp = (Zero >= A ? Ones : Zeros); break; + } + } + else + { + Exp = (Zero > A ? Ones : Zeros); + } + + Assert.Multiple(() => + { + Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)0)), Is.EquivalentTo(Exp)); + Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)1)), Is.EquivalentTo(Exp)); + Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)2)), Is.EquivalentTo(Exp)); + Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)3)), Is.EquivalentTo(Exp)); + }); + } + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdMove.cs b/Ryujinx.Tests/Cpu/CpuTestSimdMove.cs index 498488206e..055e08689c 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdMove.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdMove.cs @@ -19,12 +19,13 @@ namespace Ryujinx.Tests.Cpu AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); - Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)0); - - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)0), Is.EqualTo(A0)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)1), Is.EqualTo(B0)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)2), Is.EqualTo(A2)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)3), Is.EqualTo(B2)); + Assert.Multiple(() => + { + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)0), Is.EqualTo(A0)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)1), Is.EqualTo(B0)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)2), Is.EqualTo(A2)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)3), Is.EqualTo(B2)); + }); } [Test, Description("TRN1 V0.8B, V1.8B, V2.8B")] @@ -39,14 +40,17 @@ namespace Ryujinx.Tests.Cpu AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)0), Is.EqualTo(A0)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)1), Is.EqualTo(B0)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)2), Is.EqualTo(A2)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)3), Is.EqualTo(B2)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)4), Is.EqualTo(A4)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)5), Is.EqualTo(B4)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)6), Is.EqualTo(A6)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)7), Is.EqualTo(B6)); + Assert.Multiple(() => + { + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)0), Is.EqualTo(A0)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)1), Is.EqualTo(B0)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)2), Is.EqualTo(A2)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)3), Is.EqualTo(B2)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)4), Is.EqualTo(A4)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)5), Is.EqualTo(B4)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)6), Is.EqualTo(A6)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)7), Is.EqualTo(B6)); + }); } [Test, Description("TRN2 V0.4S, V1.4S, V2.4S")] @@ -59,10 +63,13 @@ namespace Ryujinx.Tests.Cpu AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)0), Is.EqualTo(A1)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)1), Is.EqualTo(B1)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)2), Is.EqualTo(A3)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)3), Is.EqualTo(B3)); + Assert.Multiple(() => + { + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)0), Is.EqualTo(A1)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)1), Is.EqualTo(B1)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)2), Is.EqualTo(A3)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)3), Is.EqualTo(B3)); + }); } [Test, Description("TRN2 V0.8B, V1.8B, V2.8B")] @@ -77,14 +84,17 @@ namespace Ryujinx.Tests.Cpu AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)0), Is.EqualTo(A1)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)1), Is.EqualTo(B1)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)2), Is.EqualTo(A3)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)3), Is.EqualTo(B3)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)4), Is.EqualTo(A5)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)5), Is.EqualTo(B5)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)6), Is.EqualTo(A7)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)7), Is.EqualTo(B7)); + Assert.Multiple(() => + { + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)0), Is.EqualTo(A1)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)1), Is.EqualTo(B1)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)2), Is.EqualTo(A3)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)3), Is.EqualTo(B3)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)4), Is.EqualTo(A5)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)5), Is.EqualTo(B5)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)6), Is.EqualTo(A7)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)7), Is.EqualTo(B7)); + }); } [TestCase(0u, 0u, 0x2313221221112010ul, 0x0000000000000000ul)] diff --git a/Ryujinx.Tests/Cpu/Tester/Instructions.cs b/Ryujinx.Tests/Cpu/Tester/Instructions.cs index aa62ddccd7..a4e04e9606 100644 --- a/Ryujinx.Tests/Cpu/Tester/Instructions.cs +++ b/Ryujinx.Tests/Cpu/Tester/Instructions.cs @@ -1826,7 +1826,7 @@ namespace Ryujinx.Tests.Cpu.Tester // addp_advsimd_pair.html public static void Addp_S(Bits size, Bits Rn, Bits Rd) { - /* Decode Scalar */ + /* Decode */ int d = (int)UInt(Rd); int n = (int)UInt(Rn); @@ -1875,7 +1875,7 @@ namespace Ryujinx.Tests.Cpu.Tester { const bool U = false; - /* Decode */ + /* Decode Vector */ int d = (int)UInt(Rd); int n = (int)UInt(Rn); @@ -1917,7 +1917,7 @@ namespace Ryujinx.Tests.Cpu.Tester { const bool U = true; - /* Decode */ + /* Decode Vector */ int d = (int)UInt(Rd); int n = (int)UInt(Rn); @@ -2654,6 +2654,37 @@ namespace Ryujinx.Tests.Cpu.Tester V(d, result); } + // cnt_advsimd.html + public static void Cnt_V(bool Q, Bits size, Bits Rn, Bits Rd) + { + /* Decode Vector */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + + /* if size != '00' then ReservedValue(); */ + + int esize = 8; + int datasize = (Q ? 128 : 64); + int elements = datasize / 8; + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(datasize); + Bits operand = V(datasize, n); + + BigInteger count; + + for (int e = 0; e <= elements - 1; e++) + { + count = (BigInteger)BitCount(Elem(operand, e, esize)); + + Elem(result, e, esize, count.SubBigInteger(esize - 1, 0)); + } + + V(d, result); + } + // neg_advsimd.html#NEG_asisdmisc_R public static void Neg_S(Bits size, Bits Rn, Bits Rd) { @@ -2745,7 +2776,7 @@ namespace Ryujinx.Tests.Cpu.Tester // not_advsimd.html public static void Not_V(bool Q, Bits Rn, Bits Rd) { - /* Decode */ + /* Decode Vector */ int d = (int)UInt(Rd); int n = (int)UInt(Rn); @@ -3095,7 +3126,7 @@ namespace Ryujinx.Tests.Cpu.Tester // addp_advsimd_vec.html public static void Addp_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) { - /* Decode Vector */ + /* Decode */ int d = (int)UInt(Rd); int n = (int)UInt(Rn); int m = (int)UInt(Rm); diff --git a/Ryujinx.Tests/Cpu/Tester/Pseudocode.cs b/Ryujinx.Tests/Cpu/Tester/Pseudocode.cs index 363e2de943..3a877fb1a2 100644 --- a/Ryujinx.Tests/Cpu/Tester/Pseudocode.cs +++ b/Ryujinx.Tests/Cpu/Tester/Pseudocode.cs @@ -586,6 +586,24 @@ namespace Ryujinx.Tests.Cpu.Tester return (x >= 0 ? x : -x); } + // shared_pseudocode.html#impl-shared.BitCount.1 + public static int BitCount(Bits x) + { + int N = x.Count; + + int result = 0; + + for (int i = 0; i <= N - 1; i++) + { + if (x[i]) + { + result = result + 1; + } + } + + return result; + } + // shared_pseudocode.html#impl-shared.CountLeadingSignBits.1 public static int CountLeadingSignBits(Bits x) {