From b956bbc32c7f9fdffebfd9a9416e8e0a2a588abd Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Sun, 28 Oct 2018 23:27:50 +0100 Subject: [PATCH] Add SHA1C, SHA1H, SHA1M, SHA1P, SHA1SU0, SHA1SU1 and Isb instructions; add 6 Tests (closed box). (#483) * Update AOpCodeTable.cs * Update AInstEmitSystem.cs * Update AInstEmitSimdHash.cs * Update ASoftFallback.cs * Update CpuTestSimdReg.cs * Update CpuTestSimd.cs --- ChocolArm64/AOpCodeTable.cs | 7 + ChocolArm64/Instruction/AInstEmitSimdHash.cs | 83 ++++- ChocolArm64/Instruction/AInstEmitSystem.cs | 7 +- ChocolArm64/Instruction/ASoftFallback.cs | 304 ++++++++++++++----- Ryujinx.Tests/Cpu/CpuTestSimd.cs | 50 ++- Ryujinx.Tests/Cpu/CpuTestSimdReg.cs | 81 ++--- 6 files changed, 400 insertions(+), 132 deletions(-) diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs index 44493298bc..cbdff47fd8 100644 --- a/ChocolArm64/AOpCodeTable.cs +++ b/ChocolArm64/AOpCodeTable.cs @@ -88,6 +88,7 @@ namespace ChocolArm64 SetA64("00010011100xxxxx0xxxxxxxxxxxxxxx", AInstEmit.Extr, typeof(AOpCodeAluRs)); SetA64("10010011110xxxxxxxxxxxxxxxxxxxxx", AInstEmit.Extr, typeof(AOpCodeAluRs)); SetA64("11010101000000110010xxxxxxx11111", AInstEmit.Hint, typeof(AOpCodeSystem)); + SetA64("11010101000000110011xxxx11011111", AInstEmit.Isb, typeof(AOpCodeSystem)); SetA64("xx001000110xxxxx1xxxxxxxxxxxxxxx", AInstEmit.Ldar, typeof(AOpCodeMemEx)); SetA64("1x001000011xxxxx1xxxxxxxxxxxxxxx", AInstEmit.Ldaxp, typeof(AOpCodeMemEx)); SetA64("xx001000010xxxxx1xxxxxxxxxxxxxxx", AInstEmit.Ldaxr, typeof(AOpCodeMemEx)); @@ -386,6 +387,12 @@ namespace ChocolArm64 SetA64("x0011110xx100010000000xxxxxxxxxx", AInstEmit.Scvtf_Gp, typeof(AOpCodeSimdCvt)); SetA64("010111100x100001110110xxxxxxxxxx", AInstEmit.Scvtf_S, typeof(AOpCodeSimd)); SetA64("0x0011100x100001110110xxxxxxxxxx", AInstEmit.Scvtf_V, typeof(AOpCodeSimd)); + SetA64("01011110000xxxxx000000xxxxxxxxxx", AInstEmit.Sha1c_V, typeof(AOpCodeSimdReg)); + SetA64("0101111000101000000010xxxxxxxxxx", AInstEmit.Sha1h_V, typeof(AOpCodeSimd)); + SetA64("01011110000xxxxx001000xxxxxxxxxx", AInstEmit.Sha1m_V, typeof(AOpCodeSimdReg)); + SetA64("01011110000xxxxx000100xxxxxxxxxx", AInstEmit.Sha1p_V, typeof(AOpCodeSimdReg)); + SetA64("01011110000xxxxx001100xxxxxxxxxx", AInstEmit.Sha1su0_V, typeof(AOpCodeSimdReg)); + SetA64("0101111000101000000110xxxxxxxxxx", AInstEmit.Sha1su1_V, typeof(AOpCodeSimd)); SetA64("01011110000xxxxx010000xxxxxxxxxx", AInstEmit.Sha256h_V, typeof(AOpCodeSimdReg)); SetA64("01011110000xxxxx010100xxxxxxxxxx", AInstEmit.Sha256h2_V, typeof(AOpCodeSimdReg)); SetA64("0101111000101000001010xxxxxxxxxx", AInstEmit.Sha256su0_V, typeof(AOpCodeSimd)); diff --git a/ChocolArm64/Instruction/AInstEmitSimdHash.cs b/ChocolArm64/Instruction/AInstEmitSimdHash.cs index 6b642acb58..5a59e779f3 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdHash.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdHash.cs @@ -1,10 +1,89 @@ using ChocolArm64.Decoder; using ChocolArm64.Translation; +using static ChocolArm64.Instruction.AInstEmitSimdHelper; + namespace ChocolArm64.Instruction { static partial class AInstEmit { +#region "Sha1" + public static void Sha1c_V(AILEmitterCtx Context) + { + AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; + + Context.EmitLdvec(Op.Rd); + EmitVectorExtractZx(Context, Op.Rn, 0, 2); + Context.EmitLdvec(Op.Rm); + + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.HashChoose)); + + Context.EmitStvec(Op.Rd); + } + + public static void Sha1h_V(AILEmitterCtx Context) + { + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + + EmitVectorExtractZx(Context, Op.Rn, 0, 2); + + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.FixedRotate)); + + EmitScalarSet(Context, Op.Rd, 2); + } + + public static void Sha1m_V(AILEmitterCtx Context) + { + AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; + + Context.EmitLdvec(Op.Rd); + EmitVectorExtractZx(Context, Op.Rn, 0, 2); + Context.EmitLdvec(Op.Rm); + + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.HashMajority)); + + Context.EmitStvec(Op.Rd); + } + + public static void Sha1p_V(AILEmitterCtx Context) + { + AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; + + Context.EmitLdvec(Op.Rd); + EmitVectorExtractZx(Context, Op.Rn, 0, 2); + Context.EmitLdvec(Op.Rm); + + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.HashParity)); + + Context.EmitStvec(Op.Rd); + } + + public static void Sha1su0_V(AILEmitterCtx Context) + { + AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; + + Context.EmitLdvec(Op.Rd); + Context.EmitLdvec(Op.Rn); + Context.EmitLdvec(Op.Rm); + + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Sha1SchedulePart1)); + + Context.EmitStvec(Op.Rd); + } + + public static void Sha1su1_V(AILEmitterCtx Context) + { + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + + Context.EmitLdvec(Op.Rd); + Context.EmitLdvec(Op.Rn); + + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Sha1SchedulePart2)); + + Context.EmitStvec(Op.Rd); + } +#endregion + #region "Sha256" public static void Sha256h_V(AILEmitterCtx Context) { @@ -39,7 +118,7 @@ namespace ChocolArm64.Instruction Context.EmitLdvec(Op.Rd); Context.EmitLdvec(Op.Rn); - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SchedulePart1)); + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Sha256SchedulePart1)); Context.EmitStvec(Op.Rd); } @@ -52,7 +131,7 @@ namespace ChocolArm64.Instruction Context.EmitLdvec(Op.Rn); Context.EmitLdvec(Op.Rm); - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SchedulePart2)); + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Sha256SchedulePart2)); Context.EmitStvec(Op.Rd); } diff --git a/ChocolArm64/Instruction/AInstEmitSystem.cs b/ChocolArm64/Instruction/AInstEmitSystem.cs index 1c5d02634f..a365398ff7 100644 --- a/ChocolArm64/Instruction/AInstEmitSystem.cs +++ b/ChocolArm64/Instruction/AInstEmitSystem.cs @@ -14,6 +14,11 @@ namespace ChocolArm64.Instruction //Execute as no-op. } + public static void Isb(AILEmitterCtx Context) + { + //Execute as no-op. + } + public static void Mrs(AILEmitterCtx Context) { AOpCodeSystem Op = (AOpCodeSystem)Context.CurrOp; @@ -130,4 +135,4 @@ namespace ChocolArm64.Instruction return Id; } } -} \ No newline at end of file +} diff --git a/ChocolArm64/Instruction/ASoftFallback.cs b/ChocolArm64/Instruction/ASoftFallback.cs index b69e2c75e3..d643fb6f76 100644 --- a/ChocolArm64/Instruction/ASoftFallback.cs +++ b/ChocolArm64/Instruction/ASoftFallback.cs @@ -103,20 +103,20 @@ namespace ChocolArm64.Instruction #endregion #region "Saturating" - public static long SignedSrcSignedDstSatQ(long op, int Size, AThreadState State) + public static long SignedSrcSignedDstSatQ(long Op, int Size, AThreadState State) { int ESize = 8 << Size; long TMaxValue = (1L << (ESize - 1)) - 1L; long TMinValue = -(1L << (ESize - 1)); - if (op > TMaxValue) + if (Op > TMaxValue) { State.SetFpsrFlag(FPSR.QC); return TMaxValue; } - else if (op < TMinValue) + else if (Op < TMinValue) { State.SetFpsrFlag(FPSR.QC); @@ -124,24 +124,24 @@ namespace ChocolArm64.Instruction } else { - return op; + return Op; } } - public static ulong SignedSrcUnsignedDstSatQ(long op, int Size, AThreadState State) + public static ulong SignedSrcUnsignedDstSatQ(long Op, int Size, AThreadState State) { int ESize = 8 << Size; ulong TMaxValue = (1UL << ESize) - 1UL; ulong TMinValue = 0UL; - if (op > (long)TMaxValue) + if (Op > (long)TMaxValue) { State.SetFpsrFlag(FPSR.QC); return TMaxValue; } - else if (op < (long)TMinValue) + else if (Op < (long)TMinValue) { State.SetFpsrFlag(FPSR.QC); @@ -149,17 +149,17 @@ namespace ChocolArm64.Instruction } else { - return (ulong)op; + return (ulong)Op; } } - public static long UnsignedSrcSignedDstSatQ(ulong op, int Size, AThreadState State) + public static long UnsignedSrcSignedDstSatQ(ulong Op, int Size, AThreadState State) { int ESize = 8 << Size; long TMaxValue = (1L << (ESize - 1)) - 1L; - if (op > (ulong)TMaxValue) + if (Op > (ulong)TMaxValue) { State.SetFpsrFlag(FPSR.QC); @@ -167,17 +167,17 @@ namespace ChocolArm64.Instruction } else { - return (long)op; + return (long)Op; } } - public static ulong UnsignedSrcUnsignedDstSatQ(ulong op, int Size, AThreadState State) + public static ulong UnsignedSrcUnsignedDstSatQ(ulong Op, int Size, AThreadState State) { int ESize = 8 << Size; ulong TMaxValue = (1UL << ESize) - 1UL; - if (op > TMaxValue) + if (Op > TMaxValue) { State.SetFpsrFlag(FPSR.QC); @@ -185,13 +185,13 @@ namespace ChocolArm64.Instruction } else { - return op; + return Op; } } - public static long UnarySignedSatQAbsOrNeg(long op, AThreadState State) + public static long UnarySignedSatQAbsOrNeg(long Op, AThreadState State) { - if (op == long.MinValue) + if (Op == long.MinValue) { State.SetFpsrFlag(FPSR.QC); @@ -199,19 +199,19 @@ namespace ChocolArm64.Instruction } else { - return op; + return Op; } } - public static long BinarySignedSatQAdd(long op1, long op2, AThreadState State) + public static long BinarySignedSatQAdd(long Op1, long Op2, AThreadState State) { - long Add = op1 + op2; + long Add = Op1 + Op2; - if ((~(op1 ^ op2) & (op1 ^ Add)) < 0L) + if ((~(Op1 ^ Op2) & (Op1 ^ Add)) < 0L) { State.SetFpsrFlag(FPSR.QC); - if (op1 < 0L) + if (Op1 < 0L) { return long.MinValue; } @@ -226,11 +226,11 @@ namespace ChocolArm64.Instruction } } - public static ulong BinaryUnsignedSatQAdd(ulong op1, ulong op2, AThreadState State) + public static ulong BinaryUnsignedSatQAdd(ulong Op1, ulong Op2, AThreadState State) { - ulong Add = op1 + op2; + ulong Add = Op1 + Op2; - if ((Add < op1) && (Add < op2)) + if ((Add < Op1) && (Add < Op2)) { State.SetFpsrFlag(FPSR.QC); @@ -242,15 +242,15 @@ namespace ChocolArm64.Instruction } } - public static long BinarySignedSatQSub(long op1, long op2, AThreadState State) + public static long BinarySignedSatQSub(long Op1, long Op2, AThreadState State) { - long Sub = op1 - op2; + long Sub = Op1 - Op2; - if (((op1 ^ op2) & (op1 ^ Sub)) < 0L) + if (((Op1 ^ Op2) & (Op1 ^ Sub)) < 0L) { State.SetFpsrFlag(FPSR.QC); - if (op1 < 0L) + if (Op1 < 0L) { return long.MinValue; } @@ -265,11 +265,11 @@ namespace ChocolArm64.Instruction } } - public static ulong BinaryUnsignedSatQSub(ulong op1, ulong op2, AThreadState State) + public static ulong BinaryUnsignedSatQSub(ulong Op1, ulong Op2, AThreadState State) { - ulong Sub = op1 - op2; + ulong Sub = Op1 - Op2; - if (op1 < op2) + if (Op1 < Op2) { State.SetFpsrFlag(FPSR.QC); @@ -281,16 +281,16 @@ namespace ChocolArm64.Instruction } } - public static long BinarySignedSatQAcc(ulong op1, long op2, AThreadState State) + public static long BinarySignedSatQAcc(ulong Op1, long Op2, AThreadState State) { - if (op1 <= (ulong)long.MaxValue) + if (Op1 <= (ulong)long.MaxValue) { - // op1 from ulong.MinValue to (ulong)long.MaxValue - // op2 from long.MinValue to long.MaxValue + // Op1 from ulong.MinValue to (ulong)long.MaxValue + // Op2 from long.MinValue to long.MaxValue - long Add = (long)op1 + op2; + long Add = (long)Op1 + Op2; - if ((~op2 & Add) < 0L) + if ((~Op2 & Add) < 0L) { State.SetFpsrFlag(FPSR.QC); @@ -301,10 +301,10 @@ namespace ChocolArm64.Instruction return Add; } } - else if (op2 >= 0L) + else if (Op2 >= 0L) { - // op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue - // op2 from (long)ulong.MinValue to long.MaxValue + // Op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue + // Op2 from (long)ulong.MinValue to long.MaxValue State.SetFpsrFlag(FPSR.QC); @@ -312,10 +312,10 @@ namespace ChocolArm64.Instruction } else { - // op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue - // op2 from long.MinValue to (long)ulong.MinValue - 1L + // Op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue + // Op2 from long.MinValue to (long)ulong.MinValue - 1L - ulong Add = op1 + (ulong)op2; + ulong Add = Op1 + (ulong)Op2; if (Add > (ulong)long.MaxValue) { @@ -330,16 +330,16 @@ namespace ChocolArm64.Instruction } } - public static ulong BinaryUnsignedSatQAcc(long op1, ulong op2, AThreadState State) + public static ulong BinaryUnsignedSatQAcc(long Op1, ulong Op2, AThreadState State) { - if (op1 >= 0L) + if (Op1 >= 0L) { - // op1 from (long)ulong.MinValue to long.MaxValue - // op2 from ulong.MinValue to ulong.MaxValue + // Op1 from (long)ulong.MinValue to long.MaxValue + // Op2 from ulong.MinValue to ulong.MaxValue - ulong Add = (ulong)op1 + op2; + ulong Add = (ulong)Op1 + Op2; - if ((Add < (ulong)op1) && (Add < op2)) + if ((Add < (ulong)Op1) && (Add < Op2)) { State.SetFpsrFlag(FPSR.QC); @@ -350,19 +350,19 @@ namespace ChocolArm64.Instruction return Add; } } - else if (op2 > (ulong)long.MaxValue) + else if (Op2 > (ulong)long.MaxValue) { - // op1 from long.MinValue to (long)ulong.MinValue - 1L - // op2 from (ulong)long.MaxValue + 1UL to ulong.MaxValue + // Op1 from long.MinValue to (long)ulong.MinValue - 1L + // Op2 from (ulong)long.MaxValue + 1UL to ulong.MaxValue - return (ulong)op1 + op2; + return (ulong)Op1 + Op2; } else { - // op1 from long.MinValue to (long)ulong.MinValue - 1L - // op2 from ulong.MinValue to (ulong)long.MaxValue + // Op1 from long.MinValue to (long)ulong.MinValue - 1L + // Op2 from ulong.MinValue to (ulong)long.MaxValue - long Add = op1 + (long)op2; + long Add = Op1 + (long)Op2; if (Add < (long)ulong.MinValue) { @@ -530,6 +530,150 @@ namespace ChocolArm64.Instruction } #endregion +#region "Sha1" + public static Vector128 HashChoose(Vector128 hash_abcd, uint hash_e, Vector128 wk) + { + for (int e = 0; e <= 3; e++) + { + uint t = SHAchoose((uint)VectorExtractIntZx(hash_abcd, (byte)1, 2), + (uint)VectorExtractIntZx(hash_abcd, (byte)2, 2), + (uint)VectorExtractIntZx(hash_abcd, (byte)3, 2)); + + hash_e += Rol((uint)VectorExtractIntZx(hash_abcd, (byte)0, 2), 5) + t; + hash_e += (uint)VectorExtractIntZx(wk, (byte)e, 2); + + t = Rol((uint)VectorExtractIntZx(hash_abcd, (byte)1, 2), 30); + hash_abcd = VectorInsertInt((ulong)t, hash_abcd, (byte)1, 2); + + Rol32_160(ref hash_e, ref hash_abcd); + } + + return hash_abcd; + } + + public static uint FixedRotate(uint hash_e) + { + return hash_e.Rol(30); + } + + public static Vector128 HashMajority(Vector128 hash_abcd, uint hash_e, Vector128 wk) + { + for (int e = 0; e <= 3; e++) + { + uint t = SHAmajority((uint)VectorExtractIntZx(hash_abcd, (byte)1, 2), + (uint)VectorExtractIntZx(hash_abcd, (byte)2, 2), + (uint)VectorExtractIntZx(hash_abcd, (byte)3, 2)); + + hash_e += Rol((uint)VectorExtractIntZx(hash_abcd, (byte)0, 2), 5) + t; + hash_e += (uint)VectorExtractIntZx(wk, (byte)e, 2); + + t = Rol((uint)VectorExtractIntZx(hash_abcd, (byte)1, 2), 30); + hash_abcd = VectorInsertInt((ulong)t, hash_abcd, (byte)1, 2); + + Rol32_160(ref hash_e, ref hash_abcd); + } + + return hash_abcd; + } + + public static Vector128 HashParity(Vector128 hash_abcd, uint hash_e, Vector128 wk) + { + for (int e = 0; e <= 3; e++) + { + uint t = SHAparity((uint)VectorExtractIntZx(hash_abcd, (byte)1, 2), + (uint)VectorExtractIntZx(hash_abcd, (byte)2, 2), + (uint)VectorExtractIntZx(hash_abcd, (byte)3, 2)); + + hash_e += Rol((uint)VectorExtractIntZx(hash_abcd, (byte)0, 2), 5) + t; + hash_e += (uint)VectorExtractIntZx(wk, (byte)e, 2); + + t = Rol((uint)VectorExtractIntZx(hash_abcd, (byte)1, 2), 30); + hash_abcd = VectorInsertInt((ulong)t, hash_abcd, (byte)1, 2); + + Rol32_160(ref hash_e, ref hash_abcd); + } + + return hash_abcd; + } + + public static Vector128 Sha1SchedulePart1(Vector128 w0_3, Vector128 w4_7, Vector128 w8_11) + { + if (!Sse.IsSupported) + { + throw new PlatformNotSupportedException(); + } + + Vector128 result = new Vector128(); + + ulong t2 = VectorExtractIntZx(w4_7, (byte)0, 3); + ulong t1 = VectorExtractIntZx(w0_3, (byte)1, 3); + + result = VectorInsertInt((ulong)t1, result, (byte)0, 3); + result = VectorInsertInt((ulong)t2, result, (byte)1, 3); + + return Sse.Xor(result, Sse.Xor(w0_3, w8_11)); + } + + public static Vector128 Sha1SchedulePart2(Vector128 tw0_3, Vector128 w12_15) + { + if (!Sse2.IsSupported) + { + throw new PlatformNotSupportedException(); + } + + Vector128 result = new Vector128(); + + Vector128 T = Sse.Xor(tw0_3, Sse.StaticCast( + Sse2.ShiftRightLogical128BitLane(Sse.StaticCast(w12_15), (byte)4))); + + uint tE0 = (uint)VectorExtractIntZx(T, (byte)0, 2); + uint tE1 = (uint)VectorExtractIntZx(T, (byte)1, 2); + uint tE2 = (uint)VectorExtractIntZx(T, (byte)2, 2); + uint tE3 = (uint)VectorExtractIntZx(T, (byte)3, 2); + + result = VectorInsertInt((ulong)tE0.Rol(1), result, (byte)0, 2); + result = VectorInsertInt((ulong)tE1.Rol(1), result, (byte)1, 2); + result = VectorInsertInt((ulong)tE2.Rol(1), result, (byte)2, 2); + + return VectorInsertInt((ulong)(tE3.Rol(1) ^ tE0.Rol(2)), result, (byte)3, 2); + } + + private static void Rol32_160(ref uint y, ref Vector128 X) + { + if (!Sse2.IsSupported) + { + throw new PlatformNotSupportedException(); + } + + uint xE3 = (uint)VectorExtractIntZx(X, (byte)3, 2); + + X = Sse.StaticCast(Sse2.ShiftLeftLogical128BitLane(Sse.StaticCast(X), (byte)4)); + X = VectorInsertInt((ulong)y, X, (byte)0, 2); + + y = xE3; + } + + private static uint SHAchoose(uint x, uint y, uint z) + { + return ((y ^ z) & x) ^ z; + } + + private static uint SHAmajority(uint x, uint y, uint z) + { + return (x & y) | ((x | y) & z); + } + + private static uint SHAparity(uint x, uint y, uint z) + { + return x ^ y ^ z; + } + + private static uint Rol(this uint value, int count) + { + return (value << count) | (value >> (32 - count)); + } +#endregion + #region "Sha256" [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 HashLower(Vector128 hash_abcd, Vector128 hash_efgh, Vector128 wk) @@ -543,7 +687,7 @@ namespace ChocolArm64.Instruction return SHA256hash(hash_abcd, hash_efgh, wk, false); } - public static Vector128 SchedulePart1(Vector128 w0_3, Vector128 w4_7) + public static Vector128 Sha256SchedulePart1(Vector128 w0_3, Vector128 w4_7) { Vector128 result = new Vector128(); @@ -561,7 +705,7 @@ namespace ChocolArm64.Instruction return result; } - public static Vector128 SchedulePart2(Vector128 w0_3, Vector128 w8_11, Vector128 w12_15) + public static Vector128 Sha256SchedulePart2(Vector128 w0_3, Vector128 w8_11, Vector128 w12_15) { Vector128 result = new Vector128(); @@ -650,16 +794,6 @@ namespace ChocolArm64.Instruction return x.Ror(6) ^ x.Ror(11) ^ x.Ror(25); } - private static uint SHAmajority(uint x, uint y, uint z) - { - return (x & y) | ((x | y) & z); - } - - private static uint SHAchoose(uint x, uint y, uint z) - { - return ((y ^ z) & x) ^ z; - } - private static uint Ror(this uint value, int count) { return (value >> count) | (value << (32 - count)); @@ -750,27 +884,35 @@ namespace ChocolArm64.Instruction #endregion #region "MultiplyHigh" - public static long SMulHi128(long LHS, long RHS) + public static long SMulHi128(long Left, long Right) { - long Result = (long)UMulHi128((ulong)LHS, (ulong)RHS); - if (LHS < 0) Result -= RHS; - if (RHS < 0) Result -= LHS; + long Result = (long)UMulHi128((ulong)Left, (ulong)Right); + + if (Left < 0) + { + Result -= Right; + } + + if (Right < 0) + { + Result -= Left; + } return Result; } - public static ulong UMulHi128(ulong LHS, ulong RHS) + public static ulong UMulHi128(ulong Left, ulong Right) { - //long multiplication - //multiply 32 bits at a time in 64 bit, the result is what's carried over 64 bits. - ulong LHigh = LHS >> 32; - ulong LLow = LHS & 0xFFFFFFFF; - ulong RHigh = RHS >> 32; - ulong RLow = RHS & 0xFFFFFFFF; - ulong Z2 = LLow * RLow; - ulong T = LHigh * RLow + (Z2 >> 32); + ulong LHigh = Left >> 32; + ulong LLow = Left & 0xFFFFFFFF; + ulong RHigh = Right >> 32; + ulong RLow = Right & 0xFFFFFFFF; + + ulong Z2 = LLow * RLow; + ulong T = LHigh * RLow + (Z2 >> 32); ulong Z1 = T & 0xFFFFFFFF; ulong Z0 = T >> 32; + Z1 += LLow * RHigh; return LHigh * RHigh + Z0 + (Z1 >> 32); diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs index 795d649abb..3bb24f3a03 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs @@ -371,6 +371,23 @@ namespace Ryujinx.Tests.Cpu 0x6EE1F800u // FSQRT V0.2D, V0.2D }; } + + private static uint[] _Sha1h_Sha1su1_V_() + { + return new uint[] + { + 0x5E280800u, // SHA1H S0, S0 + 0x5E281800u // SHA1SU1 V0.4S, V0.4S + }; + } + + private static uint[] _Sha256su0_V_() + { + return new uint[] + { + 0x5E282800u // SHA256SU0 V0.4S, V0.4S + }; + } #endregion private const int RndCnt = 2; @@ -1435,19 +1452,36 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } - [Test, Pairwise, Description("SHA256SU0 .4S, .4S")] - public void Sha256su0_V([Values(0u)] uint Rd, - [Values(1u, 0u)] uint Rn, - [Random(RndCnt / 2)] ulong Z0, [Random(RndCnt / 2)] ulong Z1, - [Random(RndCnt / 2)] ulong A0, [Random(RndCnt / 2)] ulong A1) + [Test, Pairwise] + public void Sha1h_Sha1su1_V([ValueSource("_Sha1h_Sha1su1_V_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Random(RndCnt / 2)] ulong Z0, [Random(RndCnt / 2)] ulong Z1, + [Random(RndCnt / 2)] ulong A0, [Random(RndCnt / 2)] ulong A1) { - uint Opcode = 0x5E282800; // SHA256SU0 V0.4S, V0.4S - Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Vector128 V0 = MakeVectorE0E1(Z0, Z1); Vector128 V1 = MakeVectorE0E1(A0, A1); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] + public void Sha256su0_V([ValueSource("_Sha256su0_V_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Random(RndCnt / 2)] ulong Z0, [Random(RndCnt / 2)] ulong Z1, + [Random(RndCnt / 2)] ulong A0, [Random(RndCnt / 2)] ulong A1) + { + Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + + Vector128 V0 = MakeVectorE0E1(Z0, Z1); + Vector128 V1 = MakeVectorE0E1(A0, A1); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1); CompareAgainstUnicorn(); } diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs index 1ea017c807..e986d7f662 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs @@ -353,6 +353,27 @@ namespace Ryujinx.Tests.Cpu 0x4EE0FC00u // FRSQRTS V0.2D, V0.2D, V0.2D }; } + + private static uint[] _Sha1c_Sha1m_Sha1p_Sha1su0_V_() + { + return new uint[] + { + 0x5E000000u, // SHA1C Q0, S0, V0.4S + 0x5E002000u, // SHA1M Q0, S0, V0.4S + 0x5E001000u, // SHA1P Q0, S0, V0.4S + 0x5E003000u // SHA1SU0 V0.4S, V0.4S, V0.4S + }; + } + + private static uint[] _Sha256h_Sha256h2_Sha256su1_V_() + { + return new uint[] + { + 0x5E004000u, // SHA256H Q0, Q0, V0.4S + 0x5E005000u, // SHA256H2 Q0, Q0, V0.4S + 0x5E006000u // SHA256SU1 V0.4S, V0.4S, V0.4S + }; + } #endregion private const int RndCnt = 2; @@ -1847,62 +1868,42 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } - [Test, Pairwise, Description("SHA256H , , .4S")] - public void Sha256h_V([Values(0u)] uint Rd, - [Values(1u, 0u)] uint Rn, - [Values(2u, 0u)] uint Rm, - [Random(RndCnt / 2)] ulong Z0, [Random(RndCnt / 2)] ulong Z1, - [Random(RndCnt / 2)] ulong A0, [Random(RndCnt / 2)] ulong A1, - [Random(RndCnt / 2)] ulong B0, [Random(RndCnt / 2)] ulong B1) + [Test, Pairwise] + public void Sha1c_Sha1m_Sha1p_Sha1su0_V([ValueSource("_Sha1c_Sha1m_Sha1p_Sha1su0_V_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [Random(RndCnt / 2)] ulong Z0, [Random(RndCnt / 2)] ulong Z1, + [Random(RndCnt / 2)] ulong A0, [Random(RndCnt / 2)] ulong A1, + [Random(RndCnt / 2)] ulong B0, [Random(RndCnt / 2)] ulong B1) { - uint Opcode = 0x5E004000; // SHA256H Q0, Q0, V0.4S - Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Vector128 V0 = MakeVectorE0E1(Z0, Z1); Vector128 V1 = MakeVectorE0E1(A0, A1); Vector128 V2 = MakeVectorE0E1(B0, B1); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2); CompareAgainstUnicorn(); } - [Test, Pairwise, Description("SHA256H2 , , .4S")] - public void Sha256h2_V([Values(0u)] uint Rd, - [Values(1u, 0u)] uint Rn, - [Values(2u, 0u)] uint Rm, - [Random(RndCnt / 2)] ulong Z0, [Random(RndCnt / 2)] ulong Z1, - [Random(RndCnt / 2)] ulong A0, [Random(RndCnt / 2)] ulong A1, - [Random(RndCnt / 2)] ulong B0, [Random(RndCnt / 2)] ulong B1) + [Test, Pairwise] + public void Sha256h_Sha256h2_Sha256su1_V([ValueSource("_Sha256h_Sha256h2_Sha256su1_V_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [Random(RndCnt / 2)] ulong Z0, [Random(RndCnt / 2)] ulong Z1, + [Random(RndCnt / 2)] ulong A0, [Random(RndCnt / 2)] ulong A1, + [Random(RndCnt / 2)] ulong B0, [Random(RndCnt / 2)] ulong B1) { - uint Opcode = 0x5E005000; // SHA256H2 Q0, Q0, V0.4S - Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Vector128 V0 = MakeVectorE0E1(Z0, Z1); Vector128 V1 = MakeVectorE0E1(A0, A1); Vector128 V2 = MakeVectorE0E1(B0, B1); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - - CompareAgainstUnicorn(); - } - - [Test, Pairwise, Description("SHA256SU1 .4S, .4S, .4S")] - public void Sha256su1_V([Values(0u)] uint Rd, - [Values(1u, 0u)] uint Rn, - [Values(2u, 0u)] uint Rm, - [Random(RndCnt / 2)] ulong Z0, [Random(RndCnt / 2)] ulong Z1, - [Random(RndCnt / 2)] ulong A0, [Random(RndCnt / 2)] ulong A1, - [Random(RndCnt / 2)] ulong B0, [Random(RndCnt / 2)] ulong B1) - { - uint Opcode = 0x5E006000; // SHA256SU1 V0.4S, V0.4S, V0.4S - Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); - - Vector128 V0 = MakeVectorE0E1(Z0, Z1); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); - - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2); CompareAgainstUnicorn(); }