forked from Mirror/Ryujinx
22b2cb39af
* Turn `MemoryOperand` into a struct * Remove `IntrinsicOperation` * Remove `PhiNode` * Remove `Node` * Turn `Operand` into a struct * Turn `Operation` into a struct * Clean up pool management methods * Add `Arena` allocator * Move `OperationHelper` to `Operation.Factory` * Move `OperandHelper` to `Operand.Factory` * Optimize `Operation` a bit * Fix `Arena` initialization * Rename `NativeList<T>` to `ArenaList<T>` * Reduce `Operand` size from 88 to 56 bytes * Reduce `Operation` size from 56 to 40 bytes * Add optimistic interning of Register & Constant operands * Optimize `RegisterUsage` pass a bit * Optimize `RemoveUnusedNodes` pass a bit Iterating in reverse-order allows killing dependency chains in a single pass. * Fix PPTC symbols * Optimize `BasicBlock` a bit Reduce allocations from `_successor` & `DominanceFrontiers` * Fix `Operation` resize * Make `Arena` expandable Change the arena allocator to be expandable by allocating in pages, with some of them being pooled. Currently 32 pages are pooled. An LRU removal mechanism should probably be added to it. Apparently MHR can allocate bitmaps large enough to exceed the 16MB limit for the type. * Move `Arena` & `ArenaList` to `Common` * Remove `ThreadStaticPool` & co * Add `PhiOperation` * Reduce `Operand` size from 56 from 48 bytes * Add linear-probing to `Operand` intern table * Optimize `HybridAllocator` a bit * Add `Allocators` class * Tune `ArenaAllocator` sizes * Add page removal mechanism to `ArenaAllocator` Remove pages which have not been used for more than 5s after each reset. I am on fence if this would be better using a Gen2 callback object like the one in System.Buffers.ArrayPool<T>, to trim the pool. Because right now if a large translation happens, the pages will be freed only after a reset. This reset may not happen for a while because no new translation is hit, but the arena base sizes are rather small. * Fix `OOM` when allocating larger than page size in `ArenaAllocator` Tweak resizing mechanism for Operand.Uses and Assignemnts. * Optimize `Optimizer` a bit * Optimize `Operand.Add<T>/Remove<T>` a bit * Clean up `PreAllocator` * Fix phi insertion order Reduce codegen diffs. * Fix code alignment * Use new heuristics for degree of parallelism * Suppress warnings * Address gdkchan's feedback Renamed `GetValue()` to `GetValueUnsafe()` to make it more clear that `Operand.Value` should usually not be modified directly. * Add fast path to `ArenaAllocator` * Assembly for `ArenaAllocator.Allocate(ulong)`: .L0: mov rax, [rcx+0x18] lea r8, [rax+rdx] cmp r8, [rcx+0x10] ja short .L2 .L1: mov rdx, [rcx+8] add rax, [rdx+8] mov [rcx+0x18], r8 ret .L2: jmp ArenaAllocator.AllocateSlow(UInt64) A few variable/field had to be changed to ulong so that RyuJIT avoids emitting zero-extends. * Implement a new heuristic to free pooled pages. If an arena is used often, it is more likely that its pages will be needed, so the pages are kept for longer (e.g: during PPTC rebuild or burst sof compilations). If is not used often, then it is more likely that its pages will not be needed (e.g: after PPTC rebuild or bursts of compilations). * Address riperiperi's feedback * Use `EqualityComparer<T>` in `IntrusiveList<T>` Avoids a potential GC hole in `Equals(T, T)`.
1546 lines
56 KiB
C#
1546 lines
56 KiB
C#
using ARMeilleure.Decoders;
|
|
using ARMeilleure.IntermediateRepresentation;
|
|
using ARMeilleure.State;
|
|
using ARMeilleure.Translation;
|
|
using System;
|
|
using System.Diagnostics;
|
|
using System.Reflection;
|
|
|
|
using static ARMeilleure.Instructions.InstEmitHelper;
|
|
using static ARMeilleure.Instructions.InstEmitSimdHelper;
|
|
using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
|
|
|
|
namespace ARMeilleure.Instructions
|
|
{
|
|
using Func1I = Func<Operand, Operand>;
|
|
|
|
static partial class InstEmit
|
|
{
|
|
public static void Fcvt_S(ArmEmitterContext context)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
if (op.Size == 0 && op.Opc == 1) // Single -> Double.
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
Operand res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), n);
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
else
|
|
{
|
|
Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
|
|
|
|
Operand res = context.ConvertToFP(OperandType.FP64, ne);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
|
}
|
|
}
|
|
else if (op.Size == 1 && op.Opc == 0) // Double -> Single.
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), n);
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
else
|
|
{
|
|
Operand ne = context.VectorExtract(OperandType.FP64, GetVec(op.Rn), 0);
|
|
|
|
Operand res = context.ConvertToFP(OperandType.FP32, ne);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
|
}
|
|
}
|
|
else if (op.Size == 0 && op.Opc == 3) // Single -> Half.
|
|
{
|
|
if (Optimizations.UseF16c)
|
|
{
|
|
Debug.Assert(!Optimizations.ForceLegacySse);
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
Operand res = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, n, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
|
|
res = context.AddIntrinsic(Intrinsic.X86Pslldq, res, Const(14)); // VectorZeroUpper112()
|
|
res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(14));
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
else
|
|
{
|
|
Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
|
|
|
|
Operand res = context.Call(typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert)), ne);
|
|
|
|
res = context.ZeroExtend16(OperandType.I64, res);
|
|
|
|
context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, 1));
|
|
}
|
|
}
|
|
else if (op.Size == 3 && op.Opc == 0) // Half -> Single.
|
|
{
|
|
if (Optimizations.UseF16c)
|
|
{
|
|
Debug.Assert(!Optimizations.ForceLegacySse);
|
|
|
|
Operand res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, GetVec(op.Rn));
|
|
res = context.VectorZeroUpper96(res);
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
else
|
|
{
|
|
Operand ne = EmitVectorExtractZx(context, op.Rn, 0, 1);
|
|
|
|
Operand res = context.Call(typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert)), ne);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
|
}
|
|
}
|
|
else if (op.Size == 1 && op.Opc == 3) // Double -> Half.
|
|
{
|
|
throw new NotImplementedException("Double-precision to half-precision.");
|
|
}
|
|
else if (op.Size == 3 && op.Opc == 1) // Double -> Half.
|
|
{
|
|
throw new NotImplementedException("Half-precision to double-precision.");
|
|
}
|
|
else // Invalid encoding.
|
|
{
|
|
Debug.Assert(false, $"type == {op.Size} && opc == {op.Opc}");
|
|
}
|
|
}
|
|
|
|
public static void Fcvtas_Gp(ArmEmitterContext context)
|
|
{
|
|
EmitFcvt_s_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
|
|
}
|
|
|
|
public static void Fcvtas_S(ArmEmitterContext context)
|
|
{
|
|
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: true, scalar: true);
|
|
}
|
|
|
|
public static void Fcvtas_V(ArmEmitterContext context)
|
|
{
|
|
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: true, scalar: false);
|
|
}
|
|
|
|
public static void Fcvtau_Gp(ArmEmitterContext context)
|
|
{
|
|
EmitFcvt_u_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
|
|
}
|
|
|
|
public static void Fcvtau_S(ArmEmitterContext context)
|
|
{
|
|
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: false, scalar: true);
|
|
}
|
|
|
|
public static void Fcvtau_V(ArmEmitterContext context)
|
|
{
|
|
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: false, scalar: false);
|
|
}
|
|
|
|
public static void Fcvtl_V(ArmEmitterContext context)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
int sizeF = op.Size & 1;
|
|
|
|
if (Optimizations.UseSse2 && sizeF == 1)
|
|
{
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
Operand res = op.RegisterSize == RegisterSize.Simd128 ? context.AddIntrinsic(Intrinsic.X86Movhlps, n, n) : n;
|
|
res = context.AddIntrinsic(Intrinsic.X86Cvtps2pd, res);
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
else if (Optimizations.UseF16c && sizeF == 0)
|
|
{
|
|
Debug.Assert(!Optimizations.ForceLegacySse);
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
Operand res = op.RegisterSize == RegisterSize.Simd128 ? context.AddIntrinsic(Intrinsic.X86Movhlps, n, n) : n;
|
|
res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, res);
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
else
|
|
{
|
|
Operand res = context.VectorZero();
|
|
|
|
int elems = 4 >> sizeF;
|
|
|
|
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
if (sizeF == 0)
|
|
{
|
|
Operand ne = EmitVectorExtractZx(context, op.Rn, part + index, 1);
|
|
|
|
Operand e = context.Call(typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert)), ne);
|
|
|
|
res = context.VectorInsert(res, e, index);
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), part + index);
|
|
|
|
Operand e = context.ConvertToFP(OperandType.FP64, ne);
|
|
|
|
res = context.VectorInsert(res, e, index);
|
|
}
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtms_Gp(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Floor), op1));
|
|
}
|
|
}
|
|
|
|
public static void Fcvtmu_Gp(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Floor), op1));
|
|
}
|
|
}
|
|
|
|
public static void Fcvtn_V(ArmEmitterContext context)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
int sizeF = op.Size & 1;
|
|
|
|
if (Optimizations.UseSse2 && sizeF == 1)
|
|
{
|
|
Operand d = GetVec(op.Rd);
|
|
|
|
Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 ? Intrinsic.X86Movlhps : Intrinsic.X86Movhlps;
|
|
|
|
Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtpd2ps, GetVec(op.Rn));
|
|
nInt = context.AddIntrinsic(Intrinsic.X86Movlhps, nInt, nInt);
|
|
|
|
Operand res = context.VectorZeroUpper64(d);
|
|
res = context.AddIntrinsic(movInst, res, nInt);
|
|
|
|
context.Copy(d, res);
|
|
}
|
|
else if (Optimizations.UseF16c && sizeF == 0)
|
|
{
|
|
Debug.Assert(!Optimizations.ForceLegacySse);
|
|
|
|
Operand d = GetVec(op.Rd);
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 ? Intrinsic.X86Movlhps : Intrinsic.X86Movhlps;
|
|
|
|
Operand nInt = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, n, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
|
|
nInt = context.AddIntrinsic(Intrinsic.X86Movlhps, nInt, nInt);
|
|
|
|
Operand res = context.VectorZeroUpper64(d);
|
|
res = context.AddIntrinsic(movInst, res, nInt);
|
|
|
|
context.Copy(d, res);
|
|
}
|
|
else
|
|
{
|
|
OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
|
|
|
|
int elems = 4 >> sizeF;
|
|
|
|
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
|
|
|
|
Operand d = GetVec(op.Rd);
|
|
|
|
Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
|
|
|
|
if (sizeF == 0)
|
|
{
|
|
Operand e = context.Call(typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert)), ne);
|
|
|
|
e = context.ZeroExtend16(OperandType.I64, e);
|
|
|
|
res = EmitVectorInsert(context, res, e, part + index, 1);
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
Operand e = context.ConvertToFP(OperandType.FP32, ne);
|
|
|
|
res = context.VectorInsert(res, e, part + index);
|
|
}
|
|
}
|
|
|
|
context.Copy(d, res);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtns_S(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: true);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: true, scalar: true);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtns_V(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: true, scalar: false);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtnu_S(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: true);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: false, scalar: true);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtnu_V(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: false, scalar: false);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtps_Gp(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Ceiling), op1));
|
|
}
|
|
}
|
|
|
|
public static void Fcvtpu_Gp(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Ceiling), op1));
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzs_Gp(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt_s_Gp(context, (op1) => op1);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzs_Gp_Fixed(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: true);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvtzs_Gp_Fixed(context);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzs_S(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: true);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvtz(context, signed: true, scalar: true);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzs_V(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvtz(context, signed: true, scalar: false);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzs_V_Fixed(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvtz(context, signed: true, scalar: false);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzu_Gp(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt_u_Gp(context, (op1) => op1);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzu_Gp_Fixed(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: true);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvtzu_Gp_Fixed(context);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzu_S(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: true);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvtz(context, signed: false, scalar: true);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzu_V(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvtz(context, signed: false, scalar: false);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzu_V_Fixed(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvtz(context, signed: false, scalar: false);
|
|
}
|
|
}
|
|
|
|
public static void Scvtf_Gp(ArmEmitterContext context)
|
|
{
|
|
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
|
|
|
|
Operand res = GetIntOrZR(context, op.Rn);
|
|
|
|
if (op.RegisterSize == RegisterSize.Int32)
|
|
{
|
|
res = context.SignExtend32(OperandType.I64, res);
|
|
}
|
|
|
|
res = EmitFPConvert(context, res, op.Size, signed: true);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
|
}
|
|
|
|
public static void Scvtf_Gp_Fixed(ArmEmitterContext context)
|
|
{
|
|
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
|
|
|
|
Operand res = GetIntOrZR(context, op.Rn);
|
|
|
|
if (op.RegisterSize == RegisterSize.Int32)
|
|
{
|
|
res = context.SignExtend32(OperandType.I64, res);
|
|
}
|
|
|
|
res = EmitFPConvert(context, res, op.Size, signed: true);
|
|
|
|
res = EmitI2fFBitsMul(context, res, op.FBits);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
|
}
|
|
|
|
public static void Scvtf_S(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitSse2ScvtfOp(context, scalar: true);
|
|
}
|
|
else
|
|
{
|
|
EmitCvtf(context, signed: true, scalar: true);
|
|
}
|
|
}
|
|
|
|
public static void Scvtf_S_Fixed(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitSse2ScvtfOp(context, scalar: true);
|
|
}
|
|
else
|
|
{
|
|
EmitCvtf(context, signed: true, scalar: true);
|
|
}
|
|
}
|
|
|
|
public static void Scvtf_V(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitSse2ScvtfOp(context, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitCvtf(context, signed: true, scalar: false);
|
|
}
|
|
}
|
|
|
|
public static void Scvtf_V_Fixed(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitSse2ScvtfOp(context, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitCvtf(context, signed: true, scalar: false);
|
|
}
|
|
}
|
|
|
|
public static void Ucvtf_Gp(ArmEmitterContext context)
|
|
{
|
|
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
|
|
|
|
Operand res = GetIntOrZR(context, op.Rn);
|
|
|
|
res = EmitFPConvert(context, res, op.Size, signed: false);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
|
}
|
|
|
|
public static void Ucvtf_Gp_Fixed(ArmEmitterContext context)
|
|
{
|
|
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
|
|
|
|
Operand res = GetIntOrZR(context, op.Rn);
|
|
|
|
res = EmitFPConvert(context, res, op.Size, signed: false);
|
|
|
|
res = EmitI2fFBitsMul(context, res, op.FBits);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
|
}
|
|
|
|
public static void Ucvtf_S(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitSse2UcvtfOp(context, scalar: true);
|
|
}
|
|
else
|
|
{
|
|
EmitCvtf(context, signed: false, scalar: true);
|
|
}
|
|
}
|
|
|
|
public static void Ucvtf_S_Fixed(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitSse2UcvtfOp(context, scalar: true);
|
|
}
|
|
else
|
|
{
|
|
EmitCvtf(context, signed: false, scalar: true);
|
|
}
|
|
}
|
|
|
|
public static void Ucvtf_V(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitSse2UcvtfOp(context, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitCvtf(context, signed: false, scalar: false);
|
|
}
|
|
}
|
|
|
|
public static void Ucvtf_V_Fixed(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitSse2UcvtfOp(context, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitCvtf(context, signed: false, scalar: false);
|
|
}
|
|
}
|
|
|
|
private static void EmitFcvt(ArmEmitterContext context, Func1I emit, bool signed, bool scalar)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
int sizeF = op.Size & 1;
|
|
int sizeI = sizeF + 2;
|
|
|
|
OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
|
|
|
|
int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = context.VectorExtract(type, n, index);
|
|
|
|
Operand e = emit(ne);
|
|
|
|
if (sizeF == 0)
|
|
{
|
|
MethodInfo info = signed
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32));
|
|
|
|
e = context.Call(info, e);
|
|
|
|
e = context.ZeroExtend32(OperandType.I64, e);
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
MethodInfo info = signed
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64));
|
|
|
|
e = context.Call(info, e);
|
|
}
|
|
|
|
res = EmitVectorInsert(context, res, e, index, sizeI);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
private static void EmitFcvtz(ArmEmitterContext context, bool signed, bool scalar)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
int sizeF = op.Size & 1;
|
|
int sizeI = sizeF + 2;
|
|
|
|
OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
|
|
|
|
int fBits = GetFBits(context);
|
|
|
|
int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = context.VectorExtract(type, n, index);
|
|
|
|
Operand e = EmitF2iFBitsMul(context, ne, fBits);
|
|
|
|
if (sizeF == 0)
|
|
{
|
|
MethodInfo info = signed
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32));
|
|
|
|
e = context.Call(info, e);
|
|
|
|
e = context.ZeroExtend32(OperandType.I64, e);
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
MethodInfo info = signed
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64));
|
|
|
|
e = context.Call(info, e);
|
|
}
|
|
|
|
res = EmitVectorInsert(context, res, e, index, sizeI);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
private static void EmitFcvt_s_Gp(ArmEmitterContext context, Func1I emit)
|
|
{
|
|
EmitFcvt___Gp(context, emit, signed: true);
|
|
}
|
|
|
|
private static void EmitFcvt_u_Gp(ArmEmitterContext context, Func1I emit)
|
|
{
|
|
EmitFcvt___Gp(context, emit, signed: false);
|
|
}
|
|
|
|
private static void EmitFcvt___Gp(ArmEmitterContext context, Func1I emit, bool signed)
|
|
{
|
|
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
|
|
|
|
OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
|
|
|
|
Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
|
|
|
|
Operand res = signed
|
|
? EmitScalarFcvts(context, emit(ne), 0)
|
|
: EmitScalarFcvtu(context, emit(ne), 0);
|
|
|
|
SetIntOrZR(context, op.Rd, res);
|
|
}
|
|
|
|
private static void EmitFcvtzs_Gp_Fixed(ArmEmitterContext context)
|
|
{
|
|
EmitFcvtz__Gp_Fixed(context, signed: true);
|
|
}
|
|
|
|
private static void EmitFcvtzu_Gp_Fixed(ArmEmitterContext context)
|
|
{
|
|
EmitFcvtz__Gp_Fixed(context, signed: false);
|
|
}
|
|
|
|
private static void EmitFcvtz__Gp_Fixed(ArmEmitterContext context, bool signed)
|
|
{
|
|
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
|
|
|
|
OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
|
|
|
|
Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
|
|
|
|
Operand res = signed
|
|
? EmitScalarFcvts(context, ne, op.FBits)
|
|
: EmitScalarFcvtu(context, ne, op.FBits);
|
|
|
|
SetIntOrZR(context, op.Rd, res);
|
|
}
|
|
|
|
private static void EmitCvtf(ArmEmitterContext context, bool signed, bool scalar)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
int sizeF = op.Size & 1;
|
|
int sizeI = sizeF + 2;
|
|
|
|
int fBits = GetFBits(context);
|
|
|
|
int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = EmitVectorLongExtract(context, op.Rn, index, sizeI);
|
|
|
|
Operand e = EmitFPConvert(context, ne, sizeF, signed);
|
|
|
|
e = EmitI2fFBitsMul(context, e, fBits);
|
|
|
|
res = context.VectorInsert(res, e, index);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
private static int GetFBits(ArmEmitterContext context)
|
|
{
|
|
if (context.CurrOp is OpCodeSimdShImm op)
|
|
{
|
|
return GetImmShr(op);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, int size, bool signed)
|
|
{
|
|
Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64);
|
|
Debug.Assert((uint)size < 2);
|
|
|
|
OperandType type = size == 0 ? OperandType.FP32 : OperandType.FP64;
|
|
|
|
if (signed)
|
|
{
|
|
return context.ConvertToFP(type, value);
|
|
}
|
|
else
|
|
{
|
|
return context.ConvertToFPUI(type, value);
|
|
}
|
|
}
|
|
|
|
private static Operand EmitScalarFcvts(ArmEmitterContext context, Operand value, int fBits)
|
|
{
|
|
Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
|
|
|
|
value = EmitF2iFBitsMul(context, value, fBits);
|
|
|
|
MethodInfo info;
|
|
|
|
if (context.CurrOp.RegisterSize == RegisterSize.Int32)
|
|
{
|
|
info = value.Type == OperandType.FP32
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS32));
|
|
}
|
|
else
|
|
{
|
|
info = value.Type == OperandType.FP32
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS64))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64));
|
|
}
|
|
|
|
return context.Call(info, value);
|
|
}
|
|
|
|
private static Operand EmitScalarFcvtu(ArmEmitterContext context, Operand value, int fBits)
|
|
{
|
|
Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
|
|
|
|
value = EmitF2iFBitsMul(context, value, fBits);
|
|
|
|
MethodInfo info;
|
|
|
|
if (context.CurrOp.RegisterSize == RegisterSize.Int32)
|
|
{
|
|
info = value.Type == OperandType.FP32
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU32));
|
|
}
|
|
else
|
|
{
|
|
info = value.Type == OperandType.FP32
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU64))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64));
|
|
}
|
|
|
|
return context.Call(info, value);
|
|
}
|
|
|
|
private static Operand EmitF2iFBitsMul(ArmEmitterContext context, Operand value, int fBits)
|
|
{
|
|
Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
|
|
|
|
if (fBits == 0)
|
|
{
|
|
return value;
|
|
}
|
|
|
|
if (value.Type == OperandType.FP32)
|
|
{
|
|
return context.Multiply(value, ConstF(MathF.Pow(2f, fBits)));
|
|
}
|
|
else /* if (value.Type == OperandType.FP64) */
|
|
{
|
|
return context.Multiply(value, ConstF(Math.Pow(2d, fBits)));
|
|
}
|
|
}
|
|
|
|
private static Operand EmitI2fFBitsMul(ArmEmitterContext context, Operand value, int fBits)
|
|
{
|
|
Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
|
|
|
|
if (fBits == 0)
|
|
{
|
|
return value;
|
|
}
|
|
|
|
if (value.Type == OperandType.FP32)
|
|
{
|
|
return context.Multiply(value, ConstF(1f / MathF.Pow(2f, fBits)));
|
|
}
|
|
else /* if (value.Type == OperandType.FP64) */
|
|
{
|
|
return context.Multiply(value, ConstF(1d / Math.Pow(2d, fBits)));
|
|
}
|
|
}
|
|
|
|
public static Operand EmitSse2CvtDoubleToInt64OpF(ArmEmitterContext context, Operand opF, bool scalar)
|
|
{
|
|
Debug.Assert(opF.Type == OperandType.V128);
|
|
|
|
Operand longL = context.AddIntrinsicLong (Intrinsic.X86Cvtsd2si, opF); // opFL
|
|
Operand res = context.VectorCreateScalar(longL);
|
|
|
|
if (!scalar)
|
|
{
|
|
Operand opFH = context.AddIntrinsic (Intrinsic.X86Movhlps, res, opF); // res doesn't matter.
|
|
Operand longH = context.AddIntrinsicLong (Intrinsic.X86Cvtsd2si, opFH);
|
|
Operand resH = context.VectorCreateScalar(longH);
|
|
res = context.AddIntrinsic (Intrinsic.X86Movlhps, res, resH);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
private static Operand EmitSse2CvtInt64ToDoubleOp(ArmEmitterContext context, Operand op, bool scalar)
|
|
{
|
|
Debug.Assert(op.Type == OperandType.V128);
|
|
|
|
Operand longL = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, op); // opL
|
|
Operand res = context.AddIntrinsic (Intrinsic.X86Cvtsi2sd, context.VectorZero(), longL);
|
|
|
|
if (!scalar)
|
|
{
|
|
Operand opH = context.AddIntrinsic (Intrinsic.X86Movhlps, res, op); // res doesn't matter.
|
|
Operand longH = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, opH);
|
|
Operand resH = context.AddIntrinsic (Intrinsic.X86Cvtsi2sd, res, longH); // res doesn't matter.
|
|
res = context.AddIntrinsic (Intrinsic.X86Movlhps, res, resH);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
private static void EmitSse2ScvtfOp(ArmEmitterContext context, bool scalar)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
// sizeF == ((OpCodeSimdShImm)op).Size - 2
|
|
int sizeF = op.Size & 1;
|
|
|
|
if (sizeF == 0)
|
|
{
|
|
Operand res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n);
|
|
|
|
if (op is OpCodeSimdShImm fixedOp)
|
|
{
|
|
int fBits = GetImmShr(fixedOp);
|
|
|
|
// BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
|
|
int fpScaled = 0x3F800000 - fBits * 0x800000;
|
|
|
|
Operand fpScaledMask = scalar
|
|
? X86GetScalar (context, fpScaled)
|
|
: X86GetAllElements(context, fpScaled);
|
|
|
|
res = context.AddIntrinsic(Intrinsic.X86Mulps, res, fpScaledMask);
|
|
}
|
|
|
|
if (scalar)
|
|
{
|
|
res = context.VectorZeroUpper96(res);
|
|
}
|
|
else if (op.RegisterSize == RegisterSize.Simd64)
|
|
{
|
|
res = context.VectorZeroUpper64(res);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
Operand res = EmitSse2CvtInt64ToDoubleOp(context, n, scalar);
|
|
|
|
if (op is OpCodeSimdShImm fixedOp)
|
|
{
|
|
int fBits = GetImmShr(fixedOp);
|
|
|
|
// BitConverter.Int64BitsToDouble(fpScaled) == 1d / Math.Pow(2d, fBits)
|
|
long fpScaled = 0x3FF0000000000000L - fBits * 0x10000000000000L;
|
|
|
|
Operand fpScaledMask = scalar
|
|
? X86GetScalar (context, fpScaled)
|
|
: X86GetAllElements(context, fpScaled);
|
|
|
|
res = context.AddIntrinsic(Intrinsic.X86Mulpd, res, fpScaledMask);
|
|
}
|
|
|
|
if (scalar)
|
|
{
|
|
res = context.VectorZeroUpper64(res);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
}
|
|
|
|
private static void EmitSse2UcvtfOp(ArmEmitterContext context, bool scalar)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
// sizeF == ((OpCodeSimdShImm)op).Size - 2
|
|
int sizeF = op.Size & 1;
|
|
|
|
if (sizeF == 0)
|
|
{
|
|
Operand mask = scalar // 65536.000f (1 << 16)
|
|
? X86GetScalar (context, 0x47800000)
|
|
: X86GetAllElements(context, 0x47800000);
|
|
|
|
Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16));
|
|
res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res);
|
|
res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask);
|
|
|
|
Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16));
|
|
res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16));
|
|
res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2);
|
|
|
|
res = context.AddIntrinsic(Intrinsic.X86Addps, res, res2);
|
|
|
|
if (op is OpCodeSimdShImm fixedOp)
|
|
{
|
|
int fBits = GetImmShr(fixedOp);
|
|
|
|
// BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
|
|
int fpScaled = 0x3F800000 - fBits * 0x800000;
|
|
|
|
Operand fpScaledMask = scalar
|
|
? X86GetScalar (context, fpScaled)
|
|
: X86GetAllElements(context, fpScaled);
|
|
|
|
res = context.AddIntrinsic(Intrinsic.X86Mulps, res, fpScaledMask);
|
|
}
|
|
|
|
if (scalar)
|
|
{
|
|
res = context.VectorZeroUpper96(res);
|
|
}
|
|
else if (op.RegisterSize == RegisterSize.Simd64)
|
|
{
|
|
res = context.VectorZeroUpper64(res);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
Operand mask = scalar // 4294967296.0000000d (1L << 32)
|
|
? X86GetScalar (context, 0x41F0000000000000L)
|
|
: X86GetAllElements(context, 0x41F0000000000000L);
|
|
|
|
Operand res = context.AddIntrinsic (Intrinsic.X86Psrlq, n, Const(32));
|
|
res = EmitSse2CvtInt64ToDoubleOp(context, res, scalar);
|
|
res = context.AddIntrinsic (Intrinsic.X86Mulpd, res, mask);
|
|
|
|
Operand res2 = context.AddIntrinsic (Intrinsic.X86Psllq, n, Const(32));
|
|
res2 = context.AddIntrinsic (Intrinsic.X86Psrlq, res2, Const(32));
|
|
res2 = EmitSse2CvtInt64ToDoubleOp(context, res2, scalar);
|
|
|
|
res = context.AddIntrinsic(Intrinsic.X86Addpd, res, res2);
|
|
|
|
if (op is OpCodeSimdShImm fixedOp)
|
|
{
|
|
int fBits = GetImmShr(fixedOp);
|
|
|
|
// BitConverter.Int64BitsToDouble(fpScaled) == 1d / Math.Pow(2d, fBits)
|
|
long fpScaled = 0x3FF0000000000000L - fBits * 0x10000000000000L;
|
|
|
|
Operand fpScaledMask = scalar
|
|
? X86GetScalar (context, fpScaled)
|
|
: X86GetAllElements(context, fpScaled);
|
|
|
|
res = context.AddIntrinsic(Intrinsic.X86Mulpd, res, fpScaledMask);
|
|
}
|
|
|
|
if (scalar)
|
|
{
|
|
res = context.VectorZeroUpper64(res);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
}
|
|
|
|
private static void EmitSse41FcvtsOpF(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
// sizeF == ((OpCodeSimdShImm)op).Size - 2
|
|
int sizeF = op.Size & 1;
|
|
|
|
if (sizeF == 0)
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (op is OpCodeSimdShImm fixedOp)
|
|
{
|
|
int fBits = GetImmShr(fixedOp);
|
|
|
|
// BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
|
|
int fpScaled = 0x3F800000 + fBits * 0x800000;
|
|
|
|
Operand fpScaledMask = scalar
|
|
? X86GetScalar (context, fpScaled)
|
|
: X86GetAllElements(context, fpScaled);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Mulps, nRes, fpScaledMask);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
|
|
|
|
Operand fpMaxValMask = scalar // 2.14748365E9f (2147483648)
|
|
? X86GetScalar (context, 0x4F000000)
|
|
: X86GetAllElements(context, 0x4F000000);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, nRes);
|
|
|
|
if (scalar)
|
|
{
|
|
dRes = context.VectorZeroUpper96(dRes);
|
|
}
|
|
else if (op.RegisterSize == RegisterSize.Simd64)
|
|
{
|
|
dRes = context.VectorZeroUpper64(dRes);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), dRes);
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (op is OpCodeSimdShImm fixedOp)
|
|
{
|
|
int fBits = GetImmShr(fixedOp);
|
|
|
|
// BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
|
|
long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
|
|
|
|
Operand fpScaledMask = scalar
|
|
? X86GetScalar (context, fpScaled)
|
|
: X86GetAllElements(context, fpScaled);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Mulpd, nRes, fpScaledMask);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand nLong = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar);
|
|
|
|
Operand fpMaxValMask = scalar // 9.2233720368547760E18d (9223372036854775808)
|
|
? X86GetScalar (context, 0x43E0000000000000L)
|
|
: X86GetAllElements(context, 0x43E0000000000000L);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong, nRes);
|
|
|
|
if (scalar)
|
|
{
|
|
dRes = context.VectorZeroUpper64(dRes);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), dRes);
|
|
}
|
|
}
|
|
|
|
private static void EmitSse41FcvtuOpF(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
// sizeF == ((OpCodeSimdShImm)op).Size - 2
|
|
int sizeF = op.Size & 1;
|
|
|
|
if (sizeF == 0)
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (op is OpCodeSimdShImm fixedOp)
|
|
{
|
|
int fBits = GetImmShr(fixedOp);
|
|
|
|
// BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
|
|
int fpScaled = 0x3F800000 + fBits * 0x800000;
|
|
|
|
Operand fpScaledMask = scalar
|
|
? X86GetScalar (context, fpScaled)
|
|
: X86GetAllElements(context, fpScaled);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Mulps, nRes, fpScaledMask);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand zero = context.VectorZero();
|
|
|
|
Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
Operand fpMaxValMask = scalar // 2.14748365E9f (2147483648)
|
|
? X86GetScalar (context, 0x4F000000)
|
|
: X86GetAllElements(context, 0x4F000000);
|
|
|
|
Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Subps, nRes, fpMaxValMask);
|
|
|
|
nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
Operand nInt2 = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt2, nRes);
|
|
dRes = context.AddIntrinsic(Intrinsic.X86Paddd, dRes, nInt);
|
|
|
|
if (scalar)
|
|
{
|
|
dRes = context.VectorZeroUpper96(dRes);
|
|
}
|
|
else if (op.RegisterSize == RegisterSize.Simd64)
|
|
{
|
|
dRes = context.VectorZeroUpper64(dRes);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), dRes);
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (op is OpCodeSimdShImm fixedOp)
|
|
{
|
|
int fBits = GetImmShr(fixedOp);
|
|
|
|
// BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
|
|
long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
|
|
|
|
Operand fpScaledMask = scalar
|
|
? X86GetScalar (context, fpScaled)
|
|
: X86GetAllElements(context, fpScaled);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Mulpd, nRes, fpScaledMask);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand zero = context.VectorZero();
|
|
|
|
Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
Operand fpMaxValMask = scalar // 9.2233720368547760E18d (9223372036854775808)
|
|
? X86GetScalar (context, 0x43E0000000000000L)
|
|
: X86GetAllElements(context, 0x43E0000000000000L);
|
|
|
|
Operand nLong = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Subpd, nRes, fpMaxValMask);
|
|
|
|
nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
Operand nLong2 = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong2, nRes);
|
|
dRes = context.AddIntrinsic(Intrinsic.X86Paddq, dRes, nLong);
|
|
|
|
if (scalar)
|
|
{
|
|
dRes = context.VectorZeroUpper64(dRes);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), dRes);
|
|
}
|
|
}
|
|
|
|
private static void EmitSse41Fcvts_Gp(ArmEmitterContext context, FPRoundingMode roundMode, bool isFixed)
|
|
{
|
|
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
if (op.Size == 0)
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (isFixed)
|
|
{
|
|
// BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, op.FBits)
|
|
int fpScaled = 0x3F800000 + op.FBits * 0x800000;
|
|
|
|
Operand fpScaledMask = X86GetScalar(context, fpScaled);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Mulss, nRes, fpScaledMask);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
|
|
? context.AddIntrinsicInt (Intrinsic.X86Cvtss2si, nRes)
|
|
: context.AddIntrinsicLong(Intrinsic.X86Cvtss2si, nRes);
|
|
|
|
int fpMaxVal = op.RegisterSize == RegisterSize.Int32
|
|
? 0x4F000000 // 2.14748365E9f (2147483648)
|
|
: 0x5F000000; // 9.223372E18f (9223372036854775808)
|
|
|
|
Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes);
|
|
|
|
if (op.RegisterSize == RegisterSize.Int64)
|
|
{
|
|
nInt = context.SignExtend32(OperandType.I64, nInt);
|
|
}
|
|
|
|
Operand dRes = context.BitwiseExclusiveOr(nIntOrLong, nInt);
|
|
|
|
SetIntOrZR(context, op.Rd, dRes);
|
|
}
|
|
else /* if (op.Size == 1) */
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (isFixed)
|
|
{
|
|
// BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, op.FBits)
|
|
long fpScaled = 0x3FF0000000000000L + op.FBits * 0x10000000000000L;
|
|
|
|
Operand fpScaledMask = X86GetScalar(context, fpScaled);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Mulsd, nRes, fpScaledMask);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
|
|
? context.AddIntrinsicInt (Intrinsic.X86Cvtsd2si, nRes)
|
|
: context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRes);
|
|
|
|
long fpMaxVal = op.RegisterSize == RegisterSize.Int32
|
|
? 0x41E0000000000000L // 2147483648.0000000d (2147483648)
|
|
: 0x43E0000000000000L; // 9.2233720368547760E18d (9223372036854775808)
|
|
|
|
Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes);
|
|
|
|
if (op.RegisterSize == RegisterSize.Int32)
|
|
{
|
|
nLong = context.ConvertI64ToI32(nLong);
|
|
}
|
|
|
|
Operand dRes = context.BitwiseExclusiveOr(nIntOrLong, nLong);
|
|
|
|
SetIntOrZR(context, op.Rd, dRes);
|
|
}
|
|
}
|
|
|
|
private static void EmitSse41Fcvtu_Gp(ArmEmitterContext context, FPRoundingMode roundMode, bool isFixed)
|
|
{
|
|
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
if (op.Size == 0)
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (isFixed)
|
|
{
|
|
// BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, op.FBits)
|
|
int fpScaled = 0x3F800000 + op.FBits * 0x800000;
|
|
|
|
Operand fpScaledMask = X86GetScalar(context, fpScaled);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Mulss, nRes, fpScaledMask);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand zero = context.VectorZero();
|
|
|
|
Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
int fpMaxVal = op.RegisterSize == RegisterSize.Int32
|
|
? 0x4F000000 // 2.14748365E9f (2147483648)
|
|
: 0x5F000000; // 9.223372E18f (9223372036854775808)
|
|
|
|
Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
|
|
|
|
Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
|
|
? context.AddIntrinsicInt (Intrinsic.X86Cvtss2si, nRes)
|
|
: context.AddIntrinsicLong(Intrinsic.X86Cvtss2si, nRes);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Subss, nRes, fpMaxValMask);
|
|
|
|
nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
Operand nIntOrLong2 = op.RegisterSize == RegisterSize.Int32
|
|
? context.AddIntrinsicInt (Intrinsic.X86Cvtss2si, nRes)
|
|
: context.AddIntrinsicLong(Intrinsic.X86Cvtss2si, nRes);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes);
|
|
|
|
if (op.RegisterSize == RegisterSize.Int64)
|
|
{
|
|
nInt = context.SignExtend32(OperandType.I64, nInt);
|
|
}
|
|
|
|
Operand dRes = context.BitwiseExclusiveOr(nIntOrLong2, nInt);
|
|
dRes = context.Add(dRes, nIntOrLong);
|
|
|
|
SetIntOrZR(context, op.Rd, dRes);
|
|
}
|
|
else /* if (op.Size == 1) */
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (isFixed)
|
|
{
|
|
// BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, op.FBits)
|
|
long fpScaled = 0x3FF0000000000000L + op.FBits * 0x10000000000000L;
|
|
|
|
Operand fpScaledMask = X86GetScalar(context, fpScaled);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Mulsd, nRes, fpScaledMask);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand zero = context.VectorZero();
|
|
|
|
Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
long fpMaxVal = op.RegisterSize == RegisterSize.Int32
|
|
? 0x41E0000000000000L // 2147483648.0000000d (2147483648)
|
|
: 0x43E0000000000000L; // 9.2233720368547760E18d (9223372036854775808)
|
|
|
|
Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
|
|
|
|
Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
|
|
? context.AddIntrinsicInt (Intrinsic.X86Cvtsd2si, nRes)
|
|
: context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRes);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Subsd, nRes, fpMaxValMask);
|
|
|
|
nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
Operand nIntOrLong2 = op.RegisterSize == RegisterSize.Int32
|
|
? context.AddIntrinsicInt (Intrinsic.X86Cvtsd2si, nRes)
|
|
: context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRes);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes);
|
|
|
|
if (op.RegisterSize == RegisterSize.Int32)
|
|
{
|
|
nLong = context.ConvertI64ToI32(nLong);
|
|
}
|
|
|
|
Operand dRes = context.BitwiseExclusiveOr(nIntOrLong2, nLong);
|
|
dRes = context.Add(dRes, nIntOrLong);
|
|
|
|
SetIntOrZR(context, op.Rd, dRes);
|
|
}
|
|
}
|
|
|
|
private static Operand EmitVectorLongExtract(ArmEmitterContext context, int reg, int index, int size)
|
|
{
|
|
OperandType type = size == 3 ? OperandType.I64 : OperandType.I32;
|
|
|
|
return context.VectorExtract(type, GetVec(reg), index);
|
|
}
|
|
}
|
|
}
|