forked from Mirror/Ryujinx
Add Fast Paths for Crypto instructions (A32/A64) (#1026)
* Add Fast Paths for Crypto instructions (A32/A64) * Replace additional XOR with passing in const zero.
This commit is contained in:
parent
a40d8d4a17
commit
f695a215ad
7 changed files with 122 additions and 8 deletions
|
@ -74,6 +74,11 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Add(X86Instruction.Addps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex));
|
||||
Add(X86Instruction.Addsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF2));
|
||||
Add(X86Instruction.Addss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF3));
|
||||
Add(X86Instruction.Aesdec, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38de, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Aesdeclast, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38df, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Aesenc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38dc, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Aesenclast, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38dd, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Aesimc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38db, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.And, new InstructionInfo(0x00000021, 0x04000083, 0x04000081, BadOp, 0x00000023, InstructionFlags.None));
|
||||
Add(X86Instruction.Andnpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Andnps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstructionFlags.Vex));
|
||||
|
|
|
@ -17,6 +17,11 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Add(Intrinsic.X86Addps, new IntrinsicInfo(X86Instruction.Addps, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Addsd, new IntrinsicInfo(X86Instruction.Addsd, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Addss, new IntrinsicInfo(X86Instruction.Addss, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Aesdec, new IntrinsicInfo(X86Instruction.Aesdec, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Aesdeclast, new IntrinsicInfo(X86Instruction.Aesdeclast, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Aesenc, new IntrinsicInfo(X86Instruction.Aesenc, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Aesenclast, new IntrinsicInfo(X86Instruction.Aesenclast, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Aesimc, new IntrinsicInfo(X86Instruction.Aesimc, IntrinsicType.Unary));
|
||||
Add(Intrinsic.X86Andnpd, new IntrinsicInfo(X86Instruction.Andnpd, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Andnps, new IntrinsicInfo(X86Instruction.Andnps, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Andpd, new IntrinsicInfo(X86Instruction.Andpd, IntrinsicType.Binary));
|
||||
|
|
|
@ -7,6 +7,11 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Addps,
|
||||
Addsd,
|
||||
Addss,
|
||||
Aesdec,
|
||||
Aesdeclast,
|
||||
Aesenc,
|
||||
Aesenclast,
|
||||
Aesimc,
|
||||
And,
|
||||
Andnpd,
|
||||
Andnps,
|
||||
|
|
|
@ -15,7 +15,17 @@ namespace ARMeilleure.Instructions
|
|||
Operand d = GetVec(op.Rd);
|
||||
Operand n = GetVec(op.Rn);
|
||||
|
||||
context.Copy(d, context.Call(new _V128_V128_V128(SoftFallback.Decrypt), d, n));
|
||||
Operand res;
|
||||
if (Optimizations.UseAesni)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.Call(new _V128_V128_V128(SoftFallback.Decrypt), d, n);
|
||||
}
|
||||
|
||||
context.Copy(d, res);
|
||||
}
|
||||
|
||||
public static void Aese_V(ArmEmitterContext context)
|
||||
|
@ -25,7 +35,17 @@ namespace ARMeilleure.Instructions
|
|||
Operand d = GetVec(op.Rd);
|
||||
Operand n = GetVec(op.Rn);
|
||||
|
||||
context.Copy(d, context.Call(new _V128_V128_V128(SoftFallback.Encrypt), d, n));
|
||||
Operand res;
|
||||
if (Optimizations.UseAesni)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.Call(new _V128_V128_V128(SoftFallback.Encrypt), d, n);
|
||||
}
|
||||
|
||||
context.Copy(d, res);
|
||||
}
|
||||
|
||||
public static void Aesimc_V(ArmEmitterContext context)
|
||||
|
@ -34,7 +54,17 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
Operand n = GetVec(op.Rn);
|
||||
|
||||
context.Copy(GetVec(op.Rd), context.Call(new _V128_V128(SoftFallback.InverseMixColumns), n));
|
||||
Operand res;
|
||||
if (Optimizations.UseAesni)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Aesimc, n);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.Call(new _V128_V128(SoftFallback.InverseMixColumns), n);
|
||||
}
|
||||
|
||||
context.Copy(GetVec(op.Rd), res);
|
||||
}
|
||||
|
||||
public static void Aesmc_V(ArmEmitterContext context)
|
||||
|
@ -43,7 +73,23 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
Operand n = GetVec(op.Rn);
|
||||
|
||||
context.Copy(GetVec(op.Rd), context.Call(new _V128_V128(SoftFallback.MixColumns), n));
|
||||
Operand res;
|
||||
if (Optimizations.UseAesni)
|
||||
{
|
||||
Operand roundKey = context.VectorZero();
|
||||
|
||||
// Inverse Shift Rows, Inverse Sub Bytes, xor 0 so nothing happens
|
||||
res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, n, roundKey);
|
||||
|
||||
// Shift Rows, Sub Bytes, Mix Columns (!), xor 0 so nothing happens
|
||||
res = context.AddIntrinsic(Intrinsic.X86Aesenc, res, roundKey);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.Call(new _V128_V128(SoftFallback.MixColumns), n);
|
||||
}
|
||||
|
||||
context.Copy(GetVec(op.Rd), res);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,7 +15,17 @@ namespace ARMeilleure.Instructions
|
|||
Operand d = GetVecA32(op.Qd);
|
||||
Operand n = GetVecA32(op.Qm);
|
||||
|
||||
context.Copy(d, context.Call(new _V128_V128_V128(SoftFallback.Decrypt), d, n));
|
||||
Operand res;
|
||||
if (Optimizations.UseAesni)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.Call(new _V128_V128_V128(SoftFallback.Decrypt), d, n);
|
||||
}
|
||||
|
||||
context.Copy(d, res);
|
||||
}
|
||||
|
||||
public static void Aese_V(ArmEmitterContext context)
|
||||
|
@ -25,7 +35,17 @@ namespace ARMeilleure.Instructions
|
|||
Operand d = GetVecA32(op.Qd);
|
||||
Operand n = GetVecA32(op.Qm);
|
||||
|
||||
context.Copy(d, context.Call(new _V128_V128_V128(SoftFallback.Encrypt), d, n));
|
||||
Operand res;
|
||||
if (Optimizations.UseAesni)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.Call(new _V128_V128_V128(SoftFallback.Encrypt), d, n);
|
||||
}
|
||||
|
||||
context.Copy(d, res);
|
||||
}
|
||||
|
||||
public static void Aesimc_V(ArmEmitterContext context)
|
||||
|
@ -34,7 +54,17 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
Operand n = GetVecA32(op.Qm);
|
||||
|
||||
context.Copy(GetVec(op.Qd), context.Call(new _V128_V128(SoftFallback.InverseMixColumns), n));
|
||||
Operand res;
|
||||
if (Optimizations.UseAesni)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Aesimc, n);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.Call(new _V128_V128(SoftFallback.InverseMixColumns), n);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(op.Qd), res);
|
||||
}
|
||||
|
||||
public static void Aesmc_V(ArmEmitterContext context)
|
||||
|
@ -43,7 +73,23 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
Operand n = GetVecA32(op.Qm);
|
||||
|
||||
context.Copy(GetVec(op.Qd), context.Call(new _V128_V128(SoftFallback.MixColumns), n));
|
||||
Operand res;
|
||||
if (Optimizations.UseAesni)
|
||||
{
|
||||
Operand roundKey = context.VectorZero();
|
||||
|
||||
// Inverse Shift Rows, Inverse Sub Bytes, xor 0 so nothing happens.
|
||||
res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, n, roundKey);
|
||||
|
||||
// Shift Rows, Sub Bytes, Mix Columns (!), xor 0 so nothing happens.
|
||||
res = context.AddIntrinsic(Intrinsic.X86Aesenc, res, roundKey);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.Call(new _V128_V128(SoftFallback.MixColumns), n);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(op.Qd), res);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,6 +6,11 @@ namespace ARMeilleure.IntermediateRepresentation
|
|||
X86Addps,
|
||||
X86Addsd,
|
||||
X86Addss,
|
||||
X86Aesdec,
|
||||
X86Aesdeclast,
|
||||
X86Aesenc,
|
||||
X86Aesenclast,
|
||||
X86Aesimc,
|
||||
X86Andnpd,
|
||||
X86Andnps,
|
||||
X86Andpd,
|
||||
|
|
|
@ -16,6 +16,7 @@ namespace ARMeilleure
|
|||
public static bool UseSse42IfAvailable { get; set; } = true;
|
||||
public static bool UsePopCntIfAvailable { get; set; } = true;
|
||||
public static bool UseAvxIfAvailable { get; set; } = true;
|
||||
public static bool UseAesniIfAvailable { get; set; } = true;
|
||||
|
||||
public static bool ForceLegacySse
|
||||
{
|
||||
|
@ -31,5 +32,6 @@ namespace ARMeilleure
|
|||
internal static bool UseSse42 => UseSse42IfAvailable && HardwareCapabilities.SupportsSse42;
|
||||
internal static bool UsePopCnt => UsePopCntIfAvailable && HardwareCapabilities.SupportsPopcnt;
|
||||
internal static bool UseAvx => UseAvxIfAvailable && HardwareCapabilities.SupportsAvx && !ForceLegacySse;
|
||||
internal static bool UseAesni => UseAesniIfAvailable && HardwareCapabilities.SupportsAesni;
|
||||
}
|
||||
}
|
Reference in a new issue