forked from Mirror/Ryujinx
Implement fast path for AES crypto instructions on Arm64 (#5281)
* Implement fast path for AES crypto instructions on Arm64 * PPTC version bump * Use AES HW feature check
This commit is contained in:
parent
eb0bb36bbf
commit
193ca3c9a2
9 changed files with 79 additions and 16 deletions
|
@ -168,8 +168,6 @@ namespace ARMeilleure.CodeGen.Arm64
|
|||
|
||||
Logger.StartPass(PassName.CodeGeneration);
|
||||
|
||||
//Console.Error.WriteLine(IRDumper.GetDump(cfg));
|
||||
|
||||
bool relocatable = (cctx.Options & CompilerOptions.Relocatable) != 0;
|
||||
|
||||
CodeGenContext context = new(allocResult, maxCallArgs, cfg.Blocks.Count, relocatable);
|
||||
|
|
|
@ -179,6 +179,35 @@ namespace ARMeilleure.CodeGen.Arm64
|
|||
(uint)operation.GetSource(2).AsInt32());
|
||||
break;
|
||||
|
||||
case IntrinsicType.Vector128Unary:
|
||||
GenerateVectorUnary(
|
||||
context,
|
||||
1,
|
||||
0,
|
||||
info.Inst,
|
||||
operation.Destination,
|
||||
operation.GetSource(0));
|
||||
break;
|
||||
case IntrinsicType.Vector128Binary:
|
||||
GenerateVectorBinary(
|
||||
context,
|
||||
1,
|
||||
0,
|
||||
info.Inst,
|
||||
operation.Destination,
|
||||
operation.GetSource(0),
|
||||
operation.GetSource(1));
|
||||
break;
|
||||
case IntrinsicType.Vector128BinaryRd:
|
||||
GenerateVectorUnary(
|
||||
context,
|
||||
1,
|
||||
0,
|
||||
info.Inst,
|
||||
operation.Destination,
|
||||
operation.GetSource(1));
|
||||
break;
|
||||
|
||||
case IntrinsicType.VectorUnary:
|
||||
GenerateVectorUnary(
|
||||
context,
|
||||
|
|
|
@ -19,8 +19,8 @@ namespace ARMeilleure.CodeGen.Arm64
|
|||
Add(Intrinsic.Arm64AddvV, new IntrinsicInfo(0x0e31b800u, IntrinsicType.VectorUnary));
|
||||
Add(Intrinsic.Arm64AddS, new IntrinsicInfo(0x5e208400u, IntrinsicType.ScalarBinary));
|
||||
Add(Intrinsic.Arm64AddV, new IntrinsicInfo(0x0e208400u, IntrinsicType.VectorBinary));
|
||||
Add(Intrinsic.Arm64AesdV, new IntrinsicInfo(0x4e285800u, IntrinsicType.Vector128Unary));
|
||||
Add(Intrinsic.Arm64AeseV, new IntrinsicInfo(0x4e284800u, IntrinsicType.Vector128Unary));
|
||||
Add(Intrinsic.Arm64AesdV, new IntrinsicInfo(0x4e285800u, IntrinsicType.Vector128BinaryRd));
|
||||
Add(Intrinsic.Arm64AeseV, new IntrinsicInfo(0x4e284800u, IntrinsicType.Vector128BinaryRd));
|
||||
Add(Intrinsic.Arm64AesimcV, new IntrinsicInfo(0x4e287800u, IntrinsicType.Vector128Unary));
|
||||
Add(Intrinsic.Arm64AesmcV, new IntrinsicInfo(0x4e286800u, IntrinsicType.Vector128Unary));
|
||||
Add(Intrinsic.Arm64AndV, new IntrinsicInfo(0x0e201c00u, IntrinsicType.VectorBinaryBitwise));
|
||||
|
|
|
@ -23,6 +23,10 @@ namespace ARMeilleure.CodeGen.Arm64
|
|||
ScalarTernaryShlRd,
|
||||
ScalarTernaryShrRd,
|
||||
|
||||
Vector128Unary,
|
||||
Vector128Binary,
|
||||
Vector128BinaryRd,
|
||||
|
||||
VectorUnary,
|
||||
VectorUnaryBitwise,
|
||||
VectorUnaryByElem,
|
||||
|
@ -50,9 +54,6 @@ namespace ARMeilleure.CodeGen.Arm64
|
|||
VectorTernaryShlRd,
|
||||
VectorTernaryShrRd,
|
||||
|
||||
Vector128Unary,
|
||||
Vector128Binary,
|
||||
|
||||
GetRegister,
|
||||
SetRegister
|
||||
}
|
||||
|
|
|
@ -746,6 +746,7 @@ namespace ARMeilleure.CodeGen.Arm64
|
|||
info.Type == IntrinsicType.ScalarTernaryFPRdByElem ||
|
||||
info.Type == IntrinsicType.ScalarTernaryShlRd ||
|
||||
info.Type == IntrinsicType.ScalarTernaryShrRd ||
|
||||
info.Type == IntrinsicType.Vector128BinaryRd ||
|
||||
info.Type == IntrinsicType.VectorBinaryRd ||
|
||||
info.Type == IntrinsicType.VectorInsertByElem ||
|
||||
info.Type == IntrinsicType.VectorTernaryRd ||
|
||||
|
|
|
@ -17,7 +17,11 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
Operand res;
|
||||
|
||||
if (Optimizations.UseAesni)
|
||||
if (Optimizations.UseArm64Aes)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.Arm64AesdV, d, n);
|
||||
}
|
||||
else if (Optimizations.UseAesni)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
|
||||
}
|
||||
|
@ -38,7 +42,11 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
Operand res;
|
||||
|
||||
if (Optimizations.UseAesni)
|
||||
if (Optimizations.UseArm64Aes)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.Arm64AeseV, d, n);
|
||||
}
|
||||
else if (Optimizations.UseAesni)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
|
||||
}
|
||||
|
@ -58,7 +66,11 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
Operand res;
|
||||
|
||||
if (Optimizations.UseAesni)
|
||||
if (Optimizations.UseArm64Aes)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.Arm64AesimcV, n);
|
||||
}
|
||||
else if (Optimizations.UseAesni)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Aesimc, n);
|
||||
}
|
||||
|
@ -78,7 +90,11 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
Operand res;
|
||||
|
||||
if (Optimizations.UseAesni)
|
||||
if (Optimizations.UseArm64Aes)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.Arm64AesmcV, n);
|
||||
}
|
||||
else if (Optimizations.UseAesni)
|
||||
{
|
||||
Operand roundKey = context.VectorZero();
|
||||
|
||||
|
|
|
@ -17,7 +17,11 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
Operand res;
|
||||
|
||||
if (Optimizations.UseAesni)
|
||||
if (Optimizations.UseArm64Aes)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.Arm64AesdV, d, n);
|
||||
}
|
||||
else if (Optimizations.UseAesni)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
|
||||
}
|
||||
|
@ -38,7 +42,11 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
Operand res;
|
||||
|
||||
if (Optimizations.UseAesni)
|
||||
if (Optimizations.UseArm64Aes)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.Arm64AeseV, d, n);
|
||||
}
|
||||
else if (Optimizations.UseAesni)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
|
||||
}
|
||||
|
@ -58,7 +66,11 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
Operand res;
|
||||
|
||||
if (Optimizations.UseAesni)
|
||||
if (Optimizations.UseArm64Aes)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.Arm64AesimcV, n);
|
||||
}
|
||||
else if (Optimizations.UseAesni)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Aesimc, n);
|
||||
}
|
||||
|
@ -78,7 +90,11 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
Operand res;
|
||||
|
||||
if (Optimizations.UseAesni)
|
||||
if (Optimizations.UseArm64Aes)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.Arm64AesmcV, n);
|
||||
}
|
||||
else if (Optimizations.UseAesni)
|
||||
{
|
||||
Operand roundKey = context.VectorZero();
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@ namespace ARMeilleure
|
|||
public static bool UseUnmanagedDispatchLoop { get; set; } = true;
|
||||
|
||||
public static bool UseAdvSimdIfAvailable { get; set; } = true;
|
||||
public static bool UseArm64AesIfAvailable { get; set; } = true;
|
||||
public static bool UseArm64PmullIfAvailable { get; set; } = true;
|
||||
|
||||
public static bool UseSseIfAvailable { get; set; } = true;
|
||||
|
@ -41,6 +42,7 @@ namespace ARMeilleure
|
|||
}
|
||||
|
||||
internal static bool UseAdvSimd => UseAdvSimdIfAvailable && Arm64HardwareCapabilities.SupportsAdvSimd;
|
||||
internal static bool UseArm64Aes => UseArm64AesIfAvailable && Arm64HardwareCapabilities.SupportsAes;
|
||||
internal static bool UseArm64Pmull => UseArm64PmullIfAvailable && Arm64HardwareCapabilities.SupportsPmull;
|
||||
|
||||
internal static bool UseSse => UseSseIfAvailable && X86HardwareCapabilities.SupportsSse;
|
||||
|
|
|
@ -30,7 +30,7 @@ namespace ARMeilleure.Translation.PTC
|
|||
private const string OuterHeaderMagicString = "PTCohd\0\0";
|
||||
private const string InnerHeaderMagicString = "PTCihd\0\0";
|
||||
|
||||
private const uint InternalVersion = 4661; //! To be incremented manually for each change to the ARMeilleure project.
|
||||
private const uint InternalVersion = 5281; //! To be incremented manually for each change to the ARMeilleure project.
|
||||
|
||||
private const string ActualDir = "0";
|
||||
private const string BackupDir = "1";
|
||||
|
|
Loading…
Reference in a new issue