Add Fast Paths for Crypto instructions (A32/A64) (#1026)

* Add Fast Paths for Crypto instructions (A32/A64)

* Replace additional XOR with passing in const zero.
This commit is contained in:
riperiperi 2020-03-25 06:20:29 +00:00 committed by GitHub
parent a40d8d4a17
commit f695a215ad
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 122 additions and 8 deletions

View file

@ -74,6 +74,11 @@ namespace ARMeilleure.CodeGen.X86
Add(X86Instruction.Addps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex)); Add(X86Instruction.Addps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex));
Add(X86Instruction.Addsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF2)); Add(X86Instruction.Addsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF2));
Add(X86Instruction.Addss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF3)); Add(X86Instruction.Addss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF3));
Add(X86Instruction.Aesdec, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38de, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Aesdeclast, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38df, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Aesenc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38dc, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Aesenclast, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38dd, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Aesimc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38db, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.And, new InstructionInfo(0x00000021, 0x04000083, 0x04000081, BadOp, 0x00000023, InstructionFlags.None)); Add(X86Instruction.And, new InstructionInfo(0x00000021, 0x04000083, 0x04000081, BadOp, 0x00000023, InstructionFlags.None));
Add(X86Instruction.Andnpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Andnpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Andnps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstructionFlags.Vex)); Add(X86Instruction.Andnps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstructionFlags.Vex));

View file

@ -17,6 +17,11 @@ namespace ARMeilleure.CodeGen.X86
Add(Intrinsic.X86Addps, new IntrinsicInfo(X86Instruction.Addps, IntrinsicType.Binary)); Add(Intrinsic.X86Addps, new IntrinsicInfo(X86Instruction.Addps, IntrinsicType.Binary));
Add(Intrinsic.X86Addsd, new IntrinsicInfo(X86Instruction.Addsd, IntrinsicType.Binary)); Add(Intrinsic.X86Addsd, new IntrinsicInfo(X86Instruction.Addsd, IntrinsicType.Binary));
Add(Intrinsic.X86Addss, new IntrinsicInfo(X86Instruction.Addss, IntrinsicType.Binary)); Add(Intrinsic.X86Addss, new IntrinsicInfo(X86Instruction.Addss, IntrinsicType.Binary));
Add(Intrinsic.X86Aesdec, new IntrinsicInfo(X86Instruction.Aesdec, IntrinsicType.Binary));
Add(Intrinsic.X86Aesdeclast, new IntrinsicInfo(X86Instruction.Aesdeclast, IntrinsicType.Binary));
Add(Intrinsic.X86Aesenc, new IntrinsicInfo(X86Instruction.Aesenc, IntrinsicType.Binary));
Add(Intrinsic.X86Aesenclast, new IntrinsicInfo(X86Instruction.Aesenclast, IntrinsicType.Binary));
Add(Intrinsic.X86Aesimc, new IntrinsicInfo(X86Instruction.Aesimc, IntrinsicType.Unary));
Add(Intrinsic.X86Andnpd, new IntrinsicInfo(X86Instruction.Andnpd, IntrinsicType.Binary)); Add(Intrinsic.X86Andnpd, new IntrinsicInfo(X86Instruction.Andnpd, IntrinsicType.Binary));
Add(Intrinsic.X86Andnps, new IntrinsicInfo(X86Instruction.Andnps, IntrinsicType.Binary)); Add(Intrinsic.X86Andnps, new IntrinsicInfo(X86Instruction.Andnps, IntrinsicType.Binary));
Add(Intrinsic.X86Andpd, new IntrinsicInfo(X86Instruction.Andpd, IntrinsicType.Binary)); Add(Intrinsic.X86Andpd, new IntrinsicInfo(X86Instruction.Andpd, IntrinsicType.Binary));

View file

@ -7,6 +7,11 @@ namespace ARMeilleure.CodeGen.X86
Addps, Addps,
Addsd, Addsd,
Addss, Addss,
Aesdec,
Aesdeclast,
Aesenc,
Aesenclast,
Aesimc,
And, And,
Andnpd, Andnpd,
Andnps, Andnps,

View file

@ -15,7 +15,17 @@ namespace ARMeilleure.Instructions
Operand d = GetVec(op.Rd); Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn); Operand n = GetVec(op.Rn);
context.Copy(d, context.Call(new _V128_V128_V128(SoftFallback.Decrypt), d, n)); Operand res;
if (Optimizations.UseAesni)
{
res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
}
else
{
res = context.Call(new _V128_V128_V128(SoftFallback.Decrypt), d, n);
}
context.Copy(d, res);
} }
public static void Aese_V(ArmEmitterContext context) public static void Aese_V(ArmEmitterContext context)
@ -25,7 +35,17 @@ namespace ARMeilleure.Instructions
Operand d = GetVec(op.Rd); Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn); Operand n = GetVec(op.Rn);
context.Copy(d, context.Call(new _V128_V128_V128(SoftFallback.Encrypt), d, n)); Operand res;
if (Optimizations.UseAesni)
{
res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
}
else
{
res = context.Call(new _V128_V128_V128(SoftFallback.Encrypt), d, n);
}
context.Copy(d, res);
} }
public static void Aesimc_V(ArmEmitterContext context) public static void Aesimc_V(ArmEmitterContext context)
@ -34,7 +54,17 @@ namespace ARMeilleure.Instructions
Operand n = GetVec(op.Rn); Operand n = GetVec(op.Rn);
context.Copy(GetVec(op.Rd), context.Call(new _V128_V128(SoftFallback.InverseMixColumns), n)); Operand res;
if (Optimizations.UseAesni)
{
res = context.AddIntrinsic(Intrinsic.X86Aesimc, n);
}
else
{
res = context.Call(new _V128_V128(SoftFallback.InverseMixColumns), n);
}
context.Copy(GetVec(op.Rd), res);
} }
public static void Aesmc_V(ArmEmitterContext context) public static void Aesmc_V(ArmEmitterContext context)
@ -43,7 +73,23 @@ namespace ARMeilleure.Instructions
Operand n = GetVec(op.Rn); Operand n = GetVec(op.Rn);
context.Copy(GetVec(op.Rd), context.Call(new _V128_V128(SoftFallback.MixColumns), n)); Operand res;
if (Optimizations.UseAesni)
{
Operand roundKey = context.VectorZero();
// Inverse Shift Rows, Inverse Sub Bytes, xor 0 so nothing happens
res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, n, roundKey);
// Shift Rows, Sub Bytes, Mix Columns (!), xor 0 so nothing happens
res = context.AddIntrinsic(Intrinsic.X86Aesenc, res, roundKey);
}
else
{
res = context.Call(new _V128_V128(SoftFallback.MixColumns), n);
}
context.Copy(GetVec(op.Rd), res);
} }
} }
} }

View file

@ -15,7 +15,17 @@ namespace ARMeilleure.Instructions
Operand d = GetVecA32(op.Qd); Operand d = GetVecA32(op.Qd);
Operand n = GetVecA32(op.Qm); Operand n = GetVecA32(op.Qm);
context.Copy(d, context.Call(new _V128_V128_V128(SoftFallback.Decrypt), d, n)); Operand res;
if (Optimizations.UseAesni)
{
res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
}
else
{
res = context.Call(new _V128_V128_V128(SoftFallback.Decrypt), d, n);
}
context.Copy(d, res);
} }
public static void Aese_V(ArmEmitterContext context) public static void Aese_V(ArmEmitterContext context)
@ -25,7 +35,17 @@ namespace ARMeilleure.Instructions
Operand d = GetVecA32(op.Qd); Operand d = GetVecA32(op.Qd);
Operand n = GetVecA32(op.Qm); Operand n = GetVecA32(op.Qm);
context.Copy(d, context.Call(new _V128_V128_V128(SoftFallback.Encrypt), d, n)); Operand res;
if (Optimizations.UseAesni)
{
res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
}
else
{
res = context.Call(new _V128_V128_V128(SoftFallback.Encrypt), d, n);
}
context.Copy(d, res);
} }
public static void Aesimc_V(ArmEmitterContext context) public static void Aesimc_V(ArmEmitterContext context)
@ -34,7 +54,17 @@ namespace ARMeilleure.Instructions
Operand n = GetVecA32(op.Qm); Operand n = GetVecA32(op.Qm);
context.Copy(GetVec(op.Qd), context.Call(new _V128_V128(SoftFallback.InverseMixColumns), n)); Operand res;
if (Optimizations.UseAesni)
{
res = context.AddIntrinsic(Intrinsic.X86Aesimc, n);
}
else
{
res = context.Call(new _V128_V128(SoftFallback.InverseMixColumns), n);
}
context.Copy(GetVecA32(op.Qd), res);
} }
public static void Aesmc_V(ArmEmitterContext context) public static void Aesmc_V(ArmEmitterContext context)
@ -43,7 +73,23 @@ namespace ARMeilleure.Instructions
Operand n = GetVecA32(op.Qm); Operand n = GetVecA32(op.Qm);
context.Copy(GetVec(op.Qd), context.Call(new _V128_V128(SoftFallback.MixColumns), n)); Operand res;
if (Optimizations.UseAesni)
{
Operand roundKey = context.VectorZero();
// Inverse Shift Rows, Inverse Sub Bytes, xor 0 so nothing happens.
res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, n, roundKey);
// Shift Rows, Sub Bytes, Mix Columns (!), xor 0 so nothing happens.
res = context.AddIntrinsic(Intrinsic.X86Aesenc, res, roundKey);
}
else
{
res = context.Call(new _V128_V128(SoftFallback.MixColumns), n);
}
context.Copy(GetVecA32(op.Qd), res);
} }
} }
} }

View file

@ -6,6 +6,11 @@ namespace ARMeilleure.IntermediateRepresentation
X86Addps, X86Addps,
X86Addsd, X86Addsd,
X86Addss, X86Addss,
X86Aesdec,
X86Aesdeclast,
X86Aesenc,
X86Aesenclast,
X86Aesimc,
X86Andnpd, X86Andnpd,
X86Andnps, X86Andnps,
X86Andpd, X86Andpd,

View file

@ -16,6 +16,7 @@ namespace ARMeilleure
public static bool UseSse42IfAvailable { get; set; } = true; public static bool UseSse42IfAvailable { get; set; } = true;
public static bool UsePopCntIfAvailable { get; set; } = true; public static bool UsePopCntIfAvailable { get; set; } = true;
public static bool UseAvxIfAvailable { get; set; } = true; public static bool UseAvxIfAvailable { get; set; } = true;
public static bool UseAesniIfAvailable { get; set; } = true;
public static bool ForceLegacySse public static bool ForceLegacySse
{ {
@ -31,5 +32,6 @@ namespace ARMeilleure
internal static bool UseSse42 => UseSse42IfAvailable && HardwareCapabilities.SupportsSse42; internal static bool UseSse42 => UseSse42IfAvailable && HardwareCapabilities.SupportsSse42;
internal static bool UsePopCnt => UsePopCntIfAvailable && HardwareCapabilities.SupportsPopcnt; internal static bool UsePopCnt => UsePopCntIfAvailable && HardwareCapabilities.SupportsPopcnt;
internal static bool UseAvx => UseAvxIfAvailable && HardwareCapabilities.SupportsAvx && !ForceLegacySse; internal static bool UseAvx => UseAvxIfAvailable && HardwareCapabilities.SupportsAvx && !ForceLegacySse;
internal static bool UseAesni => UseAesniIfAvailable && HardwareCapabilities.SupportsAesni;
} }
} }