diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs index 97404bbccc..dc8cfc0879 100644 --- a/ChocolArm64/AOpCodeTable.cs +++ b/ChocolArm64/AOpCodeTable.cs @@ -180,6 +180,10 @@ namespace ChocolArm64 SetA64("0>001110<<1xxxxx101111xxxxxxxxxx", AInstEmit.Addp_V, typeof(AOpCodeSimdReg)); SetA64("000011100x110001101110xxxxxxxxxx", AInstEmit.Addv_V, typeof(AOpCodeSimd)); SetA64("01001110<<110001101110xxxxxxxxxx", AInstEmit.Addv_V, typeof(AOpCodeSimd)); + SetA64("0100111000101000010110xxxxxxxxxx", AInstEmit.Aesd_V, typeof(AOpCodeSimd)); + SetA64("0100111000101000010010xxxxxxxxxx", AInstEmit.Aese_V, typeof(AOpCodeSimd)); + SetA64("0100111000101000011110xxxxxxxxxx", AInstEmit.Aesimc_V, typeof(AOpCodeSimd)); + SetA64("0100111000101000011010xxxxxxxxxx", AInstEmit.Aesmc_V, typeof(AOpCodeSimd)); SetA64("0x001110001xxxxx000111xxxxxxxxxx", AInstEmit.And_V, typeof(AOpCodeSimdReg)); SetA64("0x001110011xxxxx000111xxxxxxxxxx", AInstEmit.Bic_V, typeof(AOpCodeSimdReg)); SetA64("0x10111100000xxx< AESInvMixColumns(Vector128 op) + { + byte[] InState = new byte[16]; + byte[] OutState = new byte[16]; + + FromVectorToByteArray(InState, ref op); + + for (int Columns = 0; Columns <= 3; Columns++) + { + int Idx = Columns << 2; + + byte Row0 = InState[Idx + 0]; // A, E, I, M: [Row0, Col0-Col3] + byte Row1 = InState[Idx + 1]; // B, F, J, N: [Row1, Col0-Col3] + byte Row2 = InState[Idx + 2]; // C, G, K, O: [Row2, Col0-Col3] + byte Row3 = InState[Idx + 3]; // D, H, L, P: [Row3, Col0-Col3] + + OutState[Idx + 0] = (byte)((uint)GFMul_0E[Row0] ^ GFMul_0B[Row1] ^ GFMul_0D[Row2] ^ GFMul_09[Row3]); + OutState[Idx + 1] = (byte)((uint)GFMul_09[Row0] ^ GFMul_0E[Row1] ^ GFMul_0B[Row2] ^ GFMul_0D[Row3]); + OutState[Idx + 2] = (byte)((uint)GFMul_0D[Row0] ^ GFMul_09[Row1] ^ GFMul_0E[Row2] ^ GFMul_0B[Row3]); + OutState[Idx + 3] = (byte)((uint)GFMul_0B[Row0] ^ GFMul_0D[Row1] ^ GFMul_09[Row2] ^ GFMul_0E[Row3]); + } + + FromByteArrayToVector(OutState, ref op); + + return op; + } + + public static Vector128 AESInvShiftRows(Vector128 op) + { + byte[] InState = new byte[16]; + byte[] OutState = new byte[16]; + + FromVectorToByteArray(InState, ref op); + + for (int Idx = 0; Idx <= 15; Idx++) + { + OutState[ISRPerm[Idx]] = InState[Idx]; + } + + FromByteArrayToVector(OutState, ref op); + + return op; + } + + public static Vector128 AESInvSubBytes(Vector128 op) + { + byte[] InState = new byte[16]; + byte[] OutState = new byte[16]; + + FromVectorToByteArray(InState, ref op); + + for (int Idx = 0; Idx <= 15; Idx++) + { + OutState[Idx] = InvSBox[InState[Idx]]; + } + + FromByteArrayToVector(OutState, ref op); + + return op; + } + + public static Vector128 AESMixColumns(Vector128 op) + { + byte[] InState = new byte[16]; + byte[] OutState = new byte[16]; + + FromVectorToByteArray(InState, ref op); + + for (int Columns = 0; Columns <= 3; Columns++) + { + int Idx = Columns << 2; + + byte Row0 = InState[Idx + 0]; // A, E, I, M: [Row0, Col0-Col3] + byte Row1 = InState[Idx + 1]; // B, F, J, N: [Row1, Col0-Col3] + byte Row2 = InState[Idx + 2]; // C, G, K, O: [Row2, Col0-Col3] + byte Row3 = InState[Idx + 3]; // D, H, L, P: [Row3, Col0-Col3] + + OutState[Idx + 0] = (byte)((uint)GFMul_02[Row0] ^ GFMul_03[Row1] ^ Row2 ^ Row3); + OutState[Idx + 1] = (byte)((uint)Row0 ^ GFMul_02[Row1] ^ GFMul_03[Row2] ^ Row3); + OutState[Idx + 2] = (byte)((uint)Row0 ^ Row1 ^ GFMul_02[Row2] ^ GFMul_03[Row3]); + OutState[Idx + 3] = (byte)((uint)GFMul_03[Row0] ^ Row1 ^ Row2 ^ GFMul_02[Row3]); + } + + FromByteArrayToVector(OutState, ref op); + + return op; + } + + public static Vector128 AESShiftRows(Vector128 op) + { + byte[] InState = new byte[16]; + byte[] OutState = new byte[16]; + + FromVectorToByteArray(InState, ref op); + + for (int Idx = 0; Idx <= 15; Idx++) + { + OutState[SRPerm[Idx]] = InState[Idx]; + } + + FromByteArrayToVector(OutState, ref op); + + return op; + } + + public static Vector128 AESSubBytes(Vector128 op) + { + byte[] InState = new byte[16]; + byte[] OutState = new byte[16]; + + FromVectorToByteArray(InState, ref op); + + for (int Idx = 0; Idx <= 15; Idx++) + { + OutState[Idx] = SBox[InState[Idx]]; + } + + FromByteArrayToVector(OutState, ref op); + + return op; + } + + private static void FromVectorToByteArray(byte[] State, ref Vector128 op) + { + ulong ULongLow = AVectorHelper.VectorExtractIntZx((op), (byte)0, 3); + ulong ULongHigh = AVectorHelper.VectorExtractIntZx((op), (byte)1, 3); + + for (int Idx = 0; Idx <= 7; Idx++) + { + State[Idx + 0] = (byte)(ULongLow & 0xFFUL); + State[Idx + 8] = (byte)(ULongHigh & 0xFFUL); + + ULongLow >>= 8; + ULongHigh >>= 8; + } + } + + private static void FromByteArrayToVector(byte[] State, ref Vector128 op) + { + if (!Sse2.IsSupported) + { + throw new PlatformNotSupportedException(); + } + + op = Sse.StaticCast(Sse2.SetVector128( + State[15], State[14], State[13], State[12], + State[11], State[10], State[9], State[8], + State[7], State[6], State[5], State[4], + State[3], State[2], State[1], State[0])); + } + } +} diff --git a/ChocolArm64/Instruction/AInstEmitSimdCrypto.cs b/ChocolArm64/Instruction/AInstEmitSimdCrypto.cs new file mode 100644 index 0000000000..b2680a588a --- /dev/null +++ b/ChocolArm64/Instruction/AInstEmitSimdCrypto.cs @@ -0,0 +1,54 @@ +using ChocolArm64.Decoder; +using ChocolArm64.Translation; + +namespace ChocolArm64.Instruction +{ + static partial class AInstEmit + { + public static void Aesd_V(AILEmitterCtx Context) + { + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + + Context.EmitLdvec(Op.Rd); + Context.EmitLdvec(Op.Rn); + + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Decrypt)); + + Context.EmitStvec(Op.Rd); + } + + public static void Aese_V(AILEmitterCtx Context) + { + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + + Context.EmitLdvec(Op.Rd); + Context.EmitLdvec(Op.Rn); + + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Encrypt)); + + Context.EmitStvec(Op.Rd); + } + + public static void Aesimc_V(AILEmitterCtx Context) + { + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + + Context.EmitLdvec(Op.Rn); + + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.InverseMixColumns)); + + Context.EmitStvec(Op.Rd); + } + + public static void Aesmc_V(AILEmitterCtx Context) + { + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + + Context.EmitLdvec(Op.Rn); + + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.MixColumns)); + + Context.EmitStvec(Op.Rd); + } + } +} diff --git a/ChocolArm64/Instruction/ASoftFallback.cs b/ChocolArm64/Instruction/ASoftFallback.cs index 0c8a39a4aa..0ae84ab2d4 100644 --- a/ChocolArm64/Instruction/ASoftFallback.cs +++ b/ChocolArm64/Instruction/ASoftFallback.cs @@ -410,6 +410,42 @@ namespace ChocolArm64.Instruction } #endregion +#region "Aes" + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 Decrypt(Vector128 value, Vector128 roundKey) + { + if (!Sse.IsSupported) + { + throw new PlatformNotSupportedException(); + } + + return ACryptoHelper.AESInvSubBytes(ACryptoHelper.AESInvShiftRows(Sse.Xor(value, roundKey))); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 Encrypt(Vector128 value, Vector128 roundKey) + { + if (!Sse.IsSupported) + { + throw new PlatformNotSupportedException(); + } + + return ACryptoHelper.AESSubBytes(ACryptoHelper.AESShiftRows(Sse.Xor(value, roundKey))); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 InverseMixColumns(Vector128 value) + { + return ACryptoHelper.AESInvMixColumns(value); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 MixColumns(Vector128 value) + { + return ACryptoHelper.AESMixColumns(value); + } +#endregion + #region "Sha256" [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 HashLower(Vector128 hash_abcd, Vector128 hash_efgh, Vector128 wk) diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdCrypto.cs b/Ryujinx.Tests/Cpu/CpuTestSimdCrypto.cs new file mode 100644 index 0000000000..e46937339b --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdCrypto.cs @@ -0,0 +1,135 @@ +// https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf + +using ChocolArm64.State; + +using NUnit.Framework; + +using System.Runtime.Intrinsics; + +namespace Ryujinx.Tests.Cpu +{ + public class CpuTestSimdCrypto : CpuTest + { + [Test, Explicit, Description("AESD .16B, .16B")] + public void Aesd_V([Values(0u)] uint Rd, + [Values(1u)] uint Rn, + [Values(0x7B5B546573745665ul)] ulong ValueH, + [Values(0x63746F725D53475Dul)] ulong ValueL, + [Random(2)] ulong RoundKeyH, + [Random(2)] ulong RoundKeyL, + [Values(0x8DCAB9BC035006BCul)] ulong ResultH, + [Values(0x8F57161E00CAFD8Dul)] ulong ResultL) + { + uint Opcode = 0x4E285800; // AESD V0.16B, V0.16B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Vector128 V0 = MakeVectorE0E1(RoundKeyL ^ ValueL, RoundKeyH ^ ValueH); + Vector128 V1 = MakeVectorE0E1(RoundKeyL, RoundKeyH); + + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(ResultL)); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(ResultH)); + }); + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V1), Is.EqualTo(RoundKeyL)); + Assert.That(GetVectorE1(ThreadState.V1), Is.EqualTo(RoundKeyH)); + }); + } + + [Test, Explicit, Description("AESE .16B, .16B")] + public void Aese_V([Values(0u)] uint Rd, + [Values(1u)] uint Rn, + [Values(0x7B5B546573745665ul)] ulong ValueH, + [Values(0x63746F725D53475Dul)] ulong ValueL, + [Random(2)] ulong RoundKeyH, + [Random(2)] ulong RoundKeyL, + [Values(0x8F92A04DFBED204Dul)] ulong ResultH, + [Values(0x4C39B1402192A84Cul)] ulong ResultL) + { + uint Opcode = 0x4E284800; // AESE V0.16B, V0.16B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Vector128 V0 = MakeVectorE0E1(RoundKeyL ^ ValueL, RoundKeyH ^ ValueH); + Vector128 V1 = MakeVectorE0E1(RoundKeyL, RoundKeyH); + + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(ResultL)); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(ResultH)); + }); + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V1), Is.EqualTo(RoundKeyL)); + Assert.That(GetVectorE1(ThreadState.V1), Is.EqualTo(RoundKeyH)); + }); + } + + [Test, Explicit, Description("AESIMC .16B, .16B")] + public void Aesimc_V([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(0x8DCAB9DC035006BCul)] ulong ValueH, + [Values(0x8F57161E00CAFD8Dul)] ulong ValueL, + [Values(0xD635A667928B5EAEul)] ulong ResultH, + [Values(0xEEC9CC3BC55F5777ul)] ulong ResultL) + { + uint Opcode = 0x4E287800; // AESIMC V0.16B, V0.16B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Vector128 V = MakeVectorE0E1(ValueL, ValueH); + + AThreadState ThreadState = SingleOpcode( + Opcode, + V0: Rn == 0u ? V : default(Vector128), + V1: Rn == 1u ? V : default(Vector128)); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(ResultL)); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(ResultH)); + }); + if (Rn == 1u) + { + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V1), Is.EqualTo(ValueL)); + Assert.That(GetVectorE1(ThreadState.V1), Is.EqualTo(ValueH)); + }); + } + } + + [Test, Explicit, Description("AESMC .16B, .16B")] + public void Aesmc_V([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(0x627A6F6644B109C8ul)] ulong ValueH, + [Values(0x2B18330A81C3B3E5ul)] ulong ValueL, + [Values(0x7B5B546573745665ul)] ulong ResultH, + [Values(0x63746F725D53475Dul)] ulong ResultL) + { + uint Opcode = 0x4E286800; // AESMC V0.16B, V0.16B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Vector128 V = MakeVectorE0E1(ValueL, ValueH); + + AThreadState ThreadState = SingleOpcode( + Opcode, + V0: Rn == 0u ? V : default(Vector128), + V1: Rn == 1u ? V : default(Vector128)); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(ResultL)); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(ResultH)); + }); + if (Rn == 1u) + { + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V1), Is.EqualTo(ValueL)); + Assert.That(GetVectorE1(ThreadState.V1), Is.EqualTo(ValueH)); + }); + } + } + } +}