From dcce4070719a3798bb96d3aa02b9ba02a7fecc16 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Wed, 27 Jan 2021 20:23:00 -0300 Subject: [PATCH] Lower precision of estimate instruction results to match Arm behavior (#1943) * Lower precision of estimate instruction results to match Arm behavior * PTC version update * Nits --- .../Instructions/InstEmitSimdArithmetic.cs | 83 +++++++++++++++---- ARMeilleure/Translation/PTC/Ptc.cs | 2 +- 2 files changed, 66 insertions(+), 19 deletions(-) diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs index f18b91cfcc..deaa6f5acd 100644 --- a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs @@ -1475,9 +1475,11 @@ namespace ARMeilleure.Instructions int sizeF = op.Size & 1; - if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0) + if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) { - EmitScalarUnaryOpF(context, Intrinsic.X86Rcpss, 0); + Operand res = EmitSse41FP32RoundExp8(context, context.AddIntrinsic(Intrinsic.X86Rcpss, GetVec(op.Rn)), scalar: true); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); } else { @@ -1494,9 +1496,16 @@ namespace ARMeilleure.Instructions int sizeF = op.Size & 1; - if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0) + if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) { - EmitVectorUnaryOpF(context, Intrinsic.X86Rcpps, 0); + Operand res = EmitSse41FP32RoundExp8(context, context.AddIntrinsic(Intrinsic.X86Rcpps, GetVec(op.Rn)), scalar: false); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); } else { @@ -1652,7 +1661,7 @@ namespace ARMeilleure.Instructions { if (Optimizations.UseSse41) { - EmitScalarRoundOpF(context, FPRoundingMode.TowardsMinusInfinity); + EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsMinusInfinity); } else { @@ -1667,7 +1676,7 @@ namespace ARMeilleure.Instructions { if (Optimizations.UseSse41) { - EmitVectorRoundOpF(context, FPRoundingMode.TowardsMinusInfinity); + EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsMinusInfinity); } else { @@ -1682,7 +1691,7 @@ namespace ARMeilleure.Instructions { if (Optimizations.UseSse41) { - EmitScalarRoundOpF(context, FPRoundingMode.ToNearest); + EmitSse41ScalarRoundOpF(context, FPRoundingMode.ToNearest); } else { @@ -1697,7 +1706,7 @@ namespace ARMeilleure.Instructions { if (Optimizations.UseSse41) { - EmitVectorRoundOpF(context, FPRoundingMode.ToNearest); + EmitSse41VectorRoundOpF(context, FPRoundingMode.ToNearest); } else { @@ -1712,7 +1721,7 @@ namespace ARMeilleure.Instructions { if (Optimizations.UseSse41) { - EmitScalarRoundOpF(context, FPRoundingMode.TowardsPlusInfinity); + EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsPlusInfinity); } else { @@ -1727,7 +1736,7 @@ namespace ARMeilleure.Instructions { if (Optimizations.UseSse41) { - EmitVectorRoundOpF(context, FPRoundingMode.TowardsPlusInfinity); + EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsPlusInfinity); } else { @@ -1778,7 +1787,7 @@ namespace ARMeilleure.Instructions { if (Optimizations.UseSse41) { - EmitScalarRoundOpF(context, FPRoundingMode.TowardsZero); + EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsZero); } else { @@ -1793,7 +1802,7 @@ namespace ARMeilleure.Instructions { if (Optimizations.UseSse41) { - EmitVectorRoundOpF(context, FPRoundingMode.TowardsZero); + EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsZero); } else { @@ -1810,9 +1819,11 @@ namespace ARMeilleure.Instructions int sizeF = op.Size & 1; - if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0) + if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) { - EmitScalarUnaryOpF(context, Intrinsic.X86Rsqrtss, 0); + Operand res = EmitSse41FP32RoundExp8(context, context.AddIntrinsic(Intrinsic.X86Rsqrtss, GetVec(op.Rn)), scalar: true); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); } else { @@ -1829,9 +1840,16 @@ namespace ARMeilleure.Instructions int sizeF = op.Size & 1; - if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0) + if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) { - EmitVectorUnaryOpF(context, Intrinsic.X86Rsqrtps, 0); + Operand res = EmitSse41FP32RoundExp8(context, context.AddIntrinsic(Intrinsic.X86Rsqrtps, GetVec(op.Rn)), scalar: false); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); } else { @@ -3498,7 +3516,7 @@ namespace ARMeilleure.Instructions return context.ConditionalSelect(cmp, op1, op2); } - private static void EmitScalarRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode) + private static void EmitSse41ScalarRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; @@ -3520,7 +3538,7 @@ namespace ARMeilleure.Instructions context.Copy(GetVec(op.Rd), res); } - private static void EmitVectorRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode) + private static void EmitSse41VectorRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; @@ -3538,6 +3556,35 @@ namespace ARMeilleure.Instructions context.Copy(GetVec(op.Rd), res); } + private static Operand EmitSse41FP32RoundExp8(ArmEmitterContext context, Operand value, bool scalar) + { + Operand roundMask; + Operand truncMask; + Operand expMask; + + if (scalar) + { + roundMask = X86GetScalar(context, 0x4000); + truncMask = X86GetScalar(context, unchecked((int)0xFFFF8000)); + expMask = X86GetScalar(context, 0x7F800000); + } + else + { + roundMask = X86GetAllElements(context, 0x4000); + truncMask = X86GetAllElements(context, unchecked((int)0xFFFF8000)); + expMask = X86GetAllElements(context, 0x7F800000); + } + + Operand oValue = value; + Operand masked = context.AddIntrinsic(Intrinsic.X86Pand, value, expMask); + Operand isNaNInf = context.AddIntrinsic(Intrinsic.X86Pcmpeqw, masked, expMask); + + value = context.AddIntrinsic(Intrinsic.X86Paddw, value, roundMask); + value = context.AddIntrinsic(Intrinsic.X86Pand, value, truncMask); + + return context.AddIntrinsic(Intrinsic.X86Blendvps, value, oValue, isNaNInf); + } + public static void EmitSse2VectorIsNaNOpF( ArmEmitterContext context, Operand opF, diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs index 75a801e5f8..fd69077b02 100644 --- a/ARMeilleure/Translation/PTC/Ptc.cs +++ b/ARMeilleure/Translation/PTC/Ptc.cs @@ -22,7 +22,7 @@ namespace ARMeilleure.Translation.PTC { private const string HeaderMagic = "PTChd"; - private const int InternalVersion = 1956; //! To be incremented manually for each change to the ARMeilleure project. + private const int InternalVersion = 1943; //! To be incremented manually for each change to the ARMeilleure project. private const string ActualDir = "0"; private const string BackupDir = "1";