forked from Mirror/Ryujinx
9db73f74cf
* ARMeilleure: Respect Fz flag for all floating point operations. This is a change in strategy for emulating the Fz FPCR flag. Before, it was set before instructions that "needed it" and reset after. However, this missed a few hot instructions like the multiplication instruction, and the entirety of A32. The new strategy is to set the Fz flag only in the following circumstances: - Set to match FPCR before translated functions/loop are executed. - Reset when calling SoftFloat methods, set when returning. - Reset when exiting execution. This allows us to remove the code around the existing Fz aware instructions, and get the accuracy benefits on all floating point instructions executed while in translated code. Single step executions now need to be called with a context wrapper - right now it just contains the Fz flag initialization, and won't actually do anything on ARM. This fixes a bug in Breath of the Wild where some physics interactions could randomly crash the game due to subnormal values not flushing to zero. This is draft right now because I need to answer the questions: - Does dotnet avoid changing the value of Mxcsr? - Is it a good idea to assume that? Or should the flag set/restore be done on every managed method call, not just softfloat? - If we assume that, do we want a unit test to verify the behaviour? I recommend testing a bunch of games, especially games affected when this was originally added, such as #1611. * Remove unused method * Use FMA for Fmadd, Fmsub, Fnmadd, Fnmsub, Fmla, Fmls ...when available. Similar implementation to A32 * Use FMA for Frecps, Frsqrts * Don't set DAZ. * Add round mode to ARM FP mode * Fix mistakes * Add test for FP state when calling managed methods * Add explanatory comment to test. * Cleanup * Add A64 FPCR flags * Vrintx_S A32 fast path on A64 backend * Address feedback 1, re-enable DAZ * Fix FMA instructions By Elem * Address feedback
800 lines
31 KiB
C#
800 lines
31 KiB
C#
using ARMeilleure.Decoders;
|
|
using ARMeilleure.IntermediateRepresentation;
|
|
using ARMeilleure.State;
|
|
using ARMeilleure.Translation;
|
|
using System;
|
|
using System.Diagnostics;
|
|
using System.Reflection;
|
|
|
|
using static ARMeilleure.Instructions.InstEmitHelper;
|
|
using static ARMeilleure.Instructions.InstEmitSimdHelper;
|
|
using static ARMeilleure.Instructions.InstEmitSimdHelper32;
|
|
using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
|
|
|
|
namespace ARMeilleure.Instructions
|
|
{
|
|
static partial class InstEmit32
|
|
{
|
|
private static int FlipVdBits(int vd, bool lowBit)
|
|
{
|
|
if (lowBit)
|
|
{
|
|
// Move the low bit to the top.
|
|
return ((vd & 0x1) << 4) | (vd >> 1);
|
|
}
|
|
else
|
|
{
|
|
// Move the high bit to the bottom.
|
|
return ((vd & 0xf) << 1) | (vd >> 4);
|
|
}
|
|
}
|
|
|
|
private static Operand EmitSaturateFloatToInt(ArmEmitterContext context, Operand op1, bool unsigned)
|
|
{
|
|
MethodInfo info;
|
|
|
|
if (op1.Type == OperandType.FP64)
|
|
{
|
|
info = unsigned
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU32))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS32));
|
|
}
|
|
else
|
|
{
|
|
info = unsigned
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32));
|
|
}
|
|
|
|
return context.Call(info, op1);
|
|
}
|
|
|
|
public static void Vcvt_V(ArmEmitterContext context)
|
|
{
|
|
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
|
|
|
|
bool unsigned = (op.Opc & 1) != 0;
|
|
bool toInteger = (op.Opc & 2) != 0;
|
|
OperandType floatSize = (op.Size == 2) ? OperandType.FP32 : OperandType.FP64;
|
|
|
|
if (toInteger)
|
|
{
|
|
if (Optimizations.UseAdvSimd)
|
|
{
|
|
InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuV : Intrinsic.Arm64FcvtzsV);
|
|
}
|
|
else if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41ConvertVector32(context, FPRoundingMode.TowardsZero, !unsigned);
|
|
}
|
|
else
|
|
{
|
|
EmitVectorUnaryOpF32(context, (op1) =>
|
|
{
|
|
return EmitSaturateFloatToInt(context, op1, unsigned);
|
|
});
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitVectorUnaryOpSimd32(context, (n) =>
|
|
{
|
|
if (unsigned)
|
|
{
|
|
Operand mask = X86GetAllElements(context, 0x47800000);
|
|
|
|
Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16));
|
|
res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res);
|
|
res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask);
|
|
|
|
Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16));
|
|
res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16));
|
|
res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2);
|
|
|
|
return context.AddIntrinsic(Intrinsic.X86Addps, res, res2);
|
|
}
|
|
else
|
|
{
|
|
return context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n);
|
|
}
|
|
});
|
|
}
|
|
else
|
|
{
|
|
if (unsigned)
|
|
{
|
|
EmitVectorUnaryOpZx32(context, (op1) => EmitFPConvert(context, op1, floatSize, false));
|
|
}
|
|
else
|
|
{
|
|
EmitVectorUnaryOpSx32(context, (op1) => EmitFPConvert(context, op1, floatSize, true));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
public static void Vcvt_FD(ArmEmitterContext context)
|
|
{
|
|
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
|
|
|
|
int vm = op.Vm;
|
|
int vd;
|
|
if (op.Size == 3)
|
|
{
|
|
vd = FlipVdBits(op.Vd, false);
|
|
// Double to single.
|
|
Operand fp = ExtractScalar(context, OperandType.FP64, vm);
|
|
|
|
Operand res = context.ConvertToFP(OperandType.FP32, fp);
|
|
|
|
InsertScalar(context, vd, res);
|
|
}
|
|
else
|
|
{
|
|
vd = FlipVdBits(op.Vd, true);
|
|
// Single to double.
|
|
Operand fp = ExtractScalar(context, OperandType.FP32, vm);
|
|
|
|
Operand res = context.ConvertToFP(OperandType.FP64, fp);
|
|
|
|
InsertScalar(context, vd, res);
|
|
}
|
|
}
|
|
|
|
// VCVT (floating-point to integer, floating-point) | VCVT (integer to floating-point, floating-point).
|
|
public static void Vcvt_FI(ArmEmitterContext context)
|
|
{
|
|
OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
|
|
|
|
bool toInteger = (op.Opc2 & 0b100) != 0;
|
|
|
|
OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
|
|
|
|
if (toInteger)
|
|
{
|
|
bool unsigned = (op.Opc2 & 1) == 0;
|
|
bool roundWithFpscr = op.Opc != 1;
|
|
|
|
if (!roundWithFpscr && Optimizations.UseAdvSimd)
|
|
{
|
|
bool doubleSize = floatSize == OperandType.FP64;
|
|
|
|
if (doubleSize)
|
|
{
|
|
Operand m = GetVecA32(op.Vm >> 1);
|
|
|
|
Operand toConvert = InstEmitSimdHelper32Arm64.EmitExtractScalar(context, m, op.Vm, doubleSize);
|
|
|
|
Intrinsic inst = (unsigned ? Intrinsic.Arm64FcvtzuGp : Intrinsic.Arm64FcvtzsGp) | Intrinsic.Arm64VDouble;
|
|
|
|
Operand asInteger = context.AddIntrinsicInt(inst, toConvert);
|
|
|
|
InsertScalar(context, op.Vd, asInteger);
|
|
}
|
|
else
|
|
{
|
|
InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuS : Intrinsic.Arm64FcvtzsS);
|
|
}
|
|
}
|
|
else if (!roundWithFpscr && Optimizations.UseSse41)
|
|
{
|
|
EmitSse41ConvertInt32(context, FPRoundingMode.TowardsZero, !unsigned);
|
|
}
|
|
else
|
|
{
|
|
Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
|
|
|
|
// TODO: Fast Path.
|
|
if (roundWithFpscr)
|
|
{
|
|
toConvert = EmitRoundByRMode(context, toConvert);
|
|
}
|
|
|
|
// Round towards zero.
|
|
Operand asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned);
|
|
|
|
InsertScalar(context, op.Vd, asInteger);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
bool unsigned = op.Opc == 0;
|
|
|
|
Operand toConvert = ExtractScalar(context, OperandType.I32, op.Vm);
|
|
|
|
Operand asFloat = EmitFPConvert(context, toConvert, floatSize, !unsigned);
|
|
|
|
InsertScalar(context, op.Vd, asFloat);
|
|
}
|
|
}
|
|
|
|
private static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n)
|
|
{
|
|
IOpCode32Simd op = (IOpCode32Simd)context.CurrOp;
|
|
|
|
string name = nameof(Math.Round);
|
|
|
|
MethodInfo info = (op.Size & 1) == 0
|
|
? typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(MidpointRounding) })
|
|
: typeof(Math). GetMethod(name, new Type[] { typeof(double), typeof(MidpointRounding) });
|
|
|
|
return context.Call(info, n, Const((int)roundMode));
|
|
}
|
|
|
|
private static FPRoundingMode RMToRoundMode(int rm)
|
|
{
|
|
FPRoundingMode roundMode;
|
|
switch (rm)
|
|
{
|
|
case 0b00:
|
|
roundMode = FPRoundingMode.ToNearestAway;
|
|
break;
|
|
case 0b01:
|
|
roundMode = FPRoundingMode.ToNearest;
|
|
break;
|
|
case 0b10:
|
|
roundMode = FPRoundingMode.TowardsPlusInfinity;
|
|
break;
|
|
case 0b11:
|
|
roundMode = FPRoundingMode.TowardsMinusInfinity;
|
|
break;
|
|
default:
|
|
throw new ArgumentOutOfRangeException(nameof(rm));
|
|
}
|
|
return roundMode;
|
|
}
|
|
|
|
// VCVTA/M/N/P (floating-point).
|
|
public static void Vcvt_RM(ArmEmitterContext context)
|
|
{
|
|
OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; // toInteger == true (opCode<18> == 1 => Opc2<2> == 1).
|
|
|
|
OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
|
|
|
|
bool unsigned = op.Opc == 0;
|
|
int rm = op.Opc2 & 3;
|
|
|
|
Intrinsic inst;
|
|
|
|
if (Optimizations.UseAdvSimd)
|
|
{
|
|
if (unsigned)
|
|
{
|
|
inst = rm switch {
|
|
0b00 => Intrinsic.Arm64FcvtauS,
|
|
0b01 => Intrinsic.Arm64FcvtnuS,
|
|
0b10 => Intrinsic.Arm64FcvtpuS,
|
|
0b11 => Intrinsic.Arm64FcvtmuS,
|
|
_ => throw new ArgumentOutOfRangeException(nameof(rm))
|
|
};
|
|
}
|
|
else
|
|
{
|
|
inst = rm switch {
|
|
0b00 => Intrinsic.Arm64FcvtasS,
|
|
0b01 => Intrinsic.Arm64FcvtnsS,
|
|
0b10 => Intrinsic.Arm64FcvtpsS,
|
|
0b11 => Intrinsic.Arm64FcvtmsS,
|
|
_ => throw new ArgumentOutOfRangeException(nameof(rm))
|
|
};
|
|
}
|
|
|
|
InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst);
|
|
}
|
|
else if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41ConvertInt32(context, RMToRoundMode(rm), !unsigned);
|
|
}
|
|
else
|
|
{
|
|
Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
|
|
|
|
switch (rm)
|
|
{
|
|
case 0b00: // Away
|
|
toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert);
|
|
break;
|
|
case 0b01: // Nearest
|
|
toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert);
|
|
break;
|
|
case 0b10: // Towards positive infinity
|
|
toConvert = EmitUnaryMathCall(context, nameof(Math.Ceiling), toConvert);
|
|
break;
|
|
case 0b11: // Towards negative infinity
|
|
toConvert = EmitUnaryMathCall(context, nameof(Math.Floor), toConvert);
|
|
break;
|
|
}
|
|
|
|
Operand asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned);
|
|
|
|
InsertScalar(context, op.Vd, asInteger);
|
|
}
|
|
}
|
|
|
|
public static void Vcvt_TB(ArmEmitterContext context)
|
|
{
|
|
OpCode32SimdCvtTB op = (OpCode32SimdCvtTB)context.CurrOp;
|
|
|
|
if (Optimizations.UseF16c)
|
|
{
|
|
Debug.Assert(!Optimizations.ForceLegacySse);
|
|
|
|
if (op.Op)
|
|
{
|
|
Operand res = ExtractScalar(context, op.Size == 1 ? OperandType.FP64 : OperandType.FP32, op.Vm);
|
|
if (op.Size == 1)
|
|
{
|
|
res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), res);
|
|
}
|
|
res = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, res, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
|
|
res = context.VectorExtract16(res, 0);
|
|
InsertScalar16(context, op.Vd, op.T, res);
|
|
}
|
|
else
|
|
{
|
|
Operand res = context.VectorCreateScalar(ExtractScalar16(context, op.Vm, op.T));
|
|
res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, res);
|
|
if (op.Size == 1)
|
|
{
|
|
res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), res);
|
|
}
|
|
res = context.VectorExtract(op.Size == 1 ? OperandType.I64 : OperandType.I32, res, 0);
|
|
InsertScalar(context, op.Vd, res);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (op.Op)
|
|
{
|
|
// Convert to half.
|
|
|
|
Operand src = ExtractScalar(context, op.Size == 1 ? OperandType.FP64 : OperandType.FP32, op.Vm);
|
|
|
|
MethodInfo method = op.Size == 1
|
|
? typeof(SoftFloat64_16).GetMethod(nameof(SoftFloat64_16.FPConvert))
|
|
: typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert));
|
|
|
|
context.ExitArmFpMode();
|
|
context.StoreToContext();
|
|
Operand res = context.Call(method, src);
|
|
context.LoadFromContext();
|
|
context.EnterArmFpMode();
|
|
|
|
InsertScalar16(context, op.Vd, op.T, res);
|
|
}
|
|
else
|
|
{
|
|
// Convert from half.
|
|
|
|
Operand src = ExtractScalar16(context, op.Vm, op.T);
|
|
|
|
MethodInfo method = op.Size == 1
|
|
? typeof(SoftFloat16_64).GetMethod(nameof(SoftFloat16_64.FPConvert))
|
|
: typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert));
|
|
|
|
context.ExitArmFpMode();
|
|
context.StoreToContext();
|
|
Operand res = context.Call(method, src);
|
|
context.LoadFromContext();
|
|
context.EnterArmFpMode();
|
|
|
|
InsertScalar(context, op.Vd, res);
|
|
}
|
|
}
|
|
}
|
|
|
|
// VRINTA/M/N/P (floating-point).
|
|
public static void Vrint_RM(ArmEmitterContext context)
|
|
{
|
|
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
|
|
|
|
OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
|
|
|
|
int rm = op.Opc2 & 3;
|
|
|
|
if (Optimizations.UseAdvSimd)
|
|
{
|
|
Intrinsic inst = rm switch {
|
|
0b00 => Intrinsic.Arm64FrintaS,
|
|
0b01 => Intrinsic.Arm64FrintnS,
|
|
0b10 => Intrinsic.Arm64FrintpS,
|
|
0b11 => Intrinsic.Arm64FrintmS,
|
|
_ => throw new ArgumentOutOfRangeException(nameof(rm))
|
|
};
|
|
|
|
InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst);
|
|
}
|
|
else if (Optimizations.UseSse41)
|
|
{
|
|
EmitScalarUnaryOpSimd32(context, (m) =>
|
|
{
|
|
FPRoundingMode roundMode = RMToRoundMode(rm);
|
|
|
|
if (roundMode != FPRoundingMode.ToNearestAway)
|
|
{
|
|
Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd;
|
|
return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(roundMode)));
|
|
}
|
|
else
|
|
{
|
|
return EmitSse41RoundToNearestWithTiesToAwayOpF(context, m, scalar: true);
|
|
}
|
|
});
|
|
}
|
|
else
|
|
{
|
|
Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
|
|
|
|
switch (rm)
|
|
{
|
|
case 0b00: // Away
|
|
toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert);
|
|
break;
|
|
case 0b01: // Nearest
|
|
toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert);
|
|
break;
|
|
case 0b10: // Towards positive infinity
|
|
toConvert = EmitUnaryMathCall(context, nameof(Math.Ceiling), toConvert);
|
|
break;
|
|
case 0b11: // Towards negative infinity
|
|
toConvert = EmitUnaryMathCall(context, nameof(Math.Floor), toConvert);
|
|
break;
|
|
}
|
|
|
|
InsertScalar(context, op.Vd, toConvert);
|
|
}
|
|
}
|
|
|
|
// VRINTA (vector).
|
|
public static void Vrinta_V(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseAdvSimd)
|
|
{
|
|
InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintaS);
|
|
}
|
|
else
|
|
{
|
|
EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, m));
|
|
}
|
|
}
|
|
|
|
// VRINTM (vector).
|
|
public static void Vrintm_V(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseAdvSimd)
|
|
{
|
|
InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintmS);
|
|
}
|
|
else if (Optimizations.UseSse2)
|
|
{
|
|
EmitVectorUnaryOpSimd32(context, (m) =>
|
|
{
|
|
return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.TowardsMinusInfinity)));
|
|
});
|
|
}
|
|
else
|
|
{
|
|
EmitVectorUnaryOpF32(context, (m) => EmitUnaryMathCall(context, nameof(Math.Floor), m));
|
|
}
|
|
}
|
|
|
|
// VRINTN (vector).
|
|
public static void Vrintn_V(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseAdvSimd)
|
|
{
|
|
InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintnS);
|
|
}
|
|
else if (Optimizations.UseSse2)
|
|
{
|
|
EmitVectorUnaryOpSimd32(context, (m) =>
|
|
{
|
|
return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
|
|
});
|
|
}
|
|
else
|
|
{
|
|
EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.ToEven, m));
|
|
}
|
|
}
|
|
|
|
// VRINTP (vector).
|
|
public static void Vrintp_V(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseAdvSimd)
|
|
{
|
|
InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintpS);
|
|
}
|
|
else if (Optimizations.UseSse2)
|
|
{
|
|
EmitVectorUnaryOpSimd32(context, (m) =>
|
|
{
|
|
return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.TowardsPlusInfinity)));
|
|
});
|
|
}
|
|
else
|
|
{
|
|
EmitVectorUnaryOpF32(context, (m) => EmitUnaryMathCall(context, nameof(Math.Ceiling), m));
|
|
}
|
|
}
|
|
|
|
// VRINTZ (floating-point).
|
|
public static void Vrint_Z(ArmEmitterContext context)
|
|
{
|
|
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
|
|
|
|
if (Optimizations.UseAdvSimd)
|
|
{
|
|
InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintzS);
|
|
}
|
|
else if (Optimizations.UseSse2)
|
|
{
|
|
EmitScalarUnaryOpSimd32(context, (m) =>
|
|
{
|
|
Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd;
|
|
return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(FPRoundingMode.TowardsZero)));
|
|
});
|
|
}
|
|
else
|
|
{
|
|
EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Truncate), op1));
|
|
}
|
|
}
|
|
|
|
// VRINTX (floating-point).
|
|
public static void Vrintx_S(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseAdvSimd)
|
|
{
|
|
InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintxS);
|
|
}
|
|
else
|
|
{
|
|
EmitScalarUnaryOpF32(context, (op1) =>
|
|
{
|
|
return EmitRoundByRMode(context, op1);
|
|
});
|
|
}
|
|
}
|
|
|
|
private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, OperandType type, bool signed)
|
|
{
|
|
Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64);
|
|
|
|
if (signed)
|
|
{
|
|
return context.ConvertToFP(type, value);
|
|
}
|
|
else
|
|
{
|
|
return context.ConvertToFPUI(type, value);
|
|
}
|
|
}
|
|
|
|
private static void EmitSse41ConvertInt32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed)
|
|
{
|
|
// A port of the similar round function in InstEmitSimdCvt.
|
|
OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
|
|
|
|
bool doubleSize = (op.Size & 1) != 0;
|
|
int shift = doubleSize ? 1 : 2;
|
|
Operand n = GetVecA32(op.Vm >> shift);
|
|
n = EmitSwapScalar(context, n, op.Vm, doubleSize);
|
|
|
|
if (!doubleSize)
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (roundMode != FPRoundingMode.ToNearestAway)
|
|
{
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
|
|
}
|
|
else
|
|
{
|
|
nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
|
|
}
|
|
|
|
Operand zero = context.VectorZero();
|
|
|
|
Operand nCmp;
|
|
Operand nIntOrLong2 = default;
|
|
|
|
if (!signed)
|
|
{
|
|
nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
}
|
|
|
|
int fpMaxVal = 0x4F000000; // 2.14748365E9f (2147483648)
|
|
|
|
Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
|
|
|
|
Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes);
|
|
|
|
if (!signed)
|
|
{
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Subss, nRes, fpMaxValMask);
|
|
|
|
nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes);
|
|
|
|
Operand dRes;
|
|
if (signed)
|
|
{
|
|
dRes = context.BitwiseExclusiveOr(nIntOrLong, nInt);
|
|
}
|
|
else
|
|
{
|
|
dRes = context.BitwiseExclusiveOr(nIntOrLong2, nInt);
|
|
dRes = context.Add(dRes, nIntOrLong);
|
|
}
|
|
|
|
InsertScalar(context, op.Vd, dRes);
|
|
}
|
|
else
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (roundMode != FPRoundingMode.ToNearestAway)
|
|
{
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
|
|
}
|
|
else
|
|
{
|
|
nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
|
|
}
|
|
|
|
Operand zero = context.VectorZero();
|
|
|
|
Operand nCmp;
|
|
Operand nIntOrLong2 = default;
|
|
|
|
if (!signed)
|
|
{
|
|
nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
}
|
|
|
|
long fpMaxVal = 0x41E0000000000000L; // 2147483648.0000000d (2147483648)
|
|
|
|
Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
|
|
|
|
Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes);
|
|
|
|
if (!signed)
|
|
{
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Subsd, nRes, fpMaxValMask);
|
|
|
|
nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes);
|
|
nLong = context.ConvertI64ToI32(nLong);
|
|
|
|
Operand dRes;
|
|
if (signed)
|
|
{
|
|
dRes = context.BitwiseExclusiveOr(nIntOrLong, nLong);
|
|
}
|
|
else
|
|
{
|
|
dRes = context.BitwiseExclusiveOr(nIntOrLong2, nLong);
|
|
dRes = context.Add(dRes, nIntOrLong);
|
|
}
|
|
|
|
InsertScalar(context, op.Vd, dRes);
|
|
}
|
|
}
|
|
|
|
private static void EmitSse41ConvertVector32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed)
|
|
{
|
|
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
|
|
|
|
EmitVectorUnaryOpSimd32(context, (n) =>
|
|
{
|
|
int sizeF = op.Size & 1;
|
|
|
|
if (sizeF == 0)
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand zero = context.VectorZero();
|
|
Operand nCmp;
|
|
if (!signed)
|
|
{
|
|
nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
}
|
|
|
|
Operand fpMaxValMask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648)
|
|
|
|
Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
|
|
Operand nInt2 = default;
|
|
|
|
if (!signed)
|
|
{
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Subps, nRes, fpMaxValMask);
|
|
|
|
nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
nInt2 = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
if (signed)
|
|
{
|
|
return context.AddIntrinsic(Intrinsic.X86Pxor, nInt, nRes);
|
|
}
|
|
else
|
|
{
|
|
Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt2, nRes);
|
|
return context.AddIntrinsic(Intrinsic.X86Paddd, dRes, nInt);
|
|
}
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand zero = context.VectorZero();
|
|
Operand nCmp;
|
|
if (!signed)
|
|
{
|
|
nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
}
|
|
|
|
Operand fpMaxValMask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808)
|
|
|
|
Operand nLong = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false);
|
|
Operand nLong2 = default;
|
|
|
|
if (!signed)
|
|
{
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Subpd, nRes, fpMaxValMask);
|
|
|
|
nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
nLong2 = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
if (signed)
|
|
{
|
|
return context.AddIntrinsic(Intrinsic.X86Pxor, nLong, nRes);
|
|
}
|
|
else
|
|
{
|
|
Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong2, nRes);
|
|
return context.AddIntrinsic(Intrinsic.X86Paddq, dRes, nLong);
|
|
}
|
|
}
|
|
});
|
|
}
|
|
}
|
|
}
|