From eefe2b20fceb56da2b4507717108b80d935cfc2e Mon Sep 17 00:00:00 2001
From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>
Date: Sun, 10 Nov 2019 03:21:03 +0100
Subject: [PATCH] Fix Fcmge_S/V & Fcmgt_S/V Inst.s (#815)

* Fix Fcmge_S/V & Fcmgt_S/V.

Follow-up Fcm**_S/V & Fc*mp*_S.
Improve CmpCondition enum.
Nits.

* Optimize Fccmp*_S & Fcmp*_S.

* Fix cvtsd2si opcode.

* Address PR feedback.
---
 ARMeilleure/CodeGen/X86/Assembler.cs          |  2 +-
 ARMeilleure/CodeGen/X86/CodeGenerator.cs      | 16 +++-
 .../CodeGen/X86/HardwareCapabilities.cs       |  2 +-
 ARMeilleure/CodeGen/X86/IntrinsicTable.cs     |  1 +
 ARMeilleure/Instructions/InstEmitSimdCmp.cs   | 77 ++++++++++---------
 ARMeilleure/Instructions/InstEmitSimdCvt.cs   | 34 ++++----
 .../Instructions/InstEmitSimdHelper.cs        | 15 ++++
 .../IntermediateRepresentation/Intrinsic.cs   |  1 +
 ARMeilleure/Optimizations.cs                  |  2 +
 .../Ryujinx.Tests.Unicorn.csproj              |  2 +-
 Ryujinx.Tests/Ryujinx.Tests.csproj            |  2 +-
 11 files changed, 92 insertions(+), 62 deletions(-)

diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs
index c64838945f..ee80d892b0 100644
--- a/ARMeilleure/CodeGen/X86/Assembler.cs
+++ b/ARMeilleure/CodeGen/X86/Assembler.cs
@@ -94,7 +94,7 @@ namespace ARMeilleure.CodeGen.X86
             Add(X86Instruction.Cvtpd2ps,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5a, InstructionFlags.Vex | InstructionFlags.Prefix66));
             Add(X86Instruction.Cvtps2dq,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5b, InstructionFlags.Vex | InstructionFlags.Prefix66));
             Add(X86Instruction.Cvtps2pd,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5a, InstructionFlags.Vex));
-            Add(X86Instruction.Cvtsd2si,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f2c, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Cvtsd2si,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f2d, InstructionFlags.Vex | InstructionFlags.PrefixF2));
             Add(X86Instruction.Cvtsd2ss,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF2));
             Add(X86Instruction.Cvtsi2sd,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF2));
             Add(X86Instruction.Cvtsi2ss,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF3));
diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs
index ae24b5631a..33fc2aee3e 100644
--- a/ARMeilleure/CodeGen/X86/CodeGenerator.cs
+++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs
@@ -265,7 +265,21 @@ namespace ARMeilleure.CodeGen.X86
 
                         Debug.Assert(dest.Type.IsInteger() && !source.Type.IsInteger());
 
-                        context.Assembler.WriteInstruction(info.Inst, dest, source, dest.Type);
+                        if (intrinOp.Intrinsic == Intrinsic.X86Cvtsi2si)
+                        {
+                            if (dest.Type == OperandType.I32)
+                            {
+                                context.Assembler.Movd(dest, source); // int _mm_cvtsi128_si32
+                            }
+                            else /* if (dest.Type == OperandType.I64) */
+                            {
+                                context.Assembler.Movq(dest, source); // __int64 _mm_cvtsi128_si64
+                            }
+                        }
+                        else
+                        {
+                            context.Assembler.WriteInstruction(info.Inst, dest, source, dest.Type);
+                        }
 
                         break;
                     }
diff --git a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
index 7f930d6b9c..ed81482928 100644
--- a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
+++ b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
@@ -26,7 +26,7 @@ namespace ARMeilleure.CodeGen.X86
 
         public static bool ForceLegacySse { get; set; }
 
-        public static bool SupportsVexEncoding => !ForceLegacySse && SupportsAvx;
+        public static bool SupportsVexEncoding => SupportsAvx && !ForceLegacySse;
 
         static HardwareCapabilities()
         {
diff --git a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
index e225f2542c..73fb5fd1b6 100644
--- a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
+++ b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
@@ -37,6 +37,7 @@ namespace ARMeilleure.CodeGen.X86
             Add(Intrinsic.X86Cvtps2pd,   new IntrinsicInfo(X86Instruction.Cvtps2pd,   IntrinsicType.Unary));
             Add(Intrinsic.X86Cvtsd2si,   new IntrinsicInfo(X86Instruction.Cvtsd2si,   IntrinsicType.UnaryToGpr));
             Add(Intrinsic.X86Cvtsd2ss,   new IntrinsicInfo(X86Instruction.Cvtsd2ss,   IntrinsicType.Binary));
+            Add(Intrinsic.X86Cvtsi2si,   new IntrinsicInfo(X86Instruction.Movd,       IntrinsicType.UnaryToGpr));
             Add(Intrinsic.X86Cvtss2sd,   new IntrinsicInfo(X86Instruction.Cvtss2sd,   IntrinsicType.Binary));
             Add(Intrinsic.X86Divpd,      new IntrinsicInfo(X86Instruction.Divpd,      IntrinsicType.Binary));
             Add(Intrinsic.X86Divps,      new IntrinsicInfo(X86Instruction.Divps,      IntrinsicType.Binary));
diff --git a/ARMeilleure/Instructions/InstEmitSimdCmp.cs b/ARMeilleure/Instructions/InstEmitSimdCmp.cs
index f27121bb33..ac1bffcb2e 100644
--- a/ARMeilleure/Instructions/InstEmitSimdCmp.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdCmp.cs
@@ -322,7 +322,7 @@ namespace ARMeilleure.Instructions
 
         public static void Fcmge_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAvx)
             {
                 EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: true);
             }
@@ -334,7 +334,7 @@ namespace ARMeilleure.Instructions
 
         public static void Fcmge_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAvx)
             {
                 EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: false);
             }
@@ -346,7 +346,7 @@ namespace ARMeilleure.Instructions
 
         public static void Fcmgt_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAvx)
             {
                 EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: true);
             }
@@ -358,7 +358,7 @@ namespace ARMeilleure.Instructions
 
         public static void Fcmgt_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAvx)
             {
                 EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: false);
             }
@@ -372,7 +372,7 @@ namespace ARMeilleure.Instructions
         {
             if (Optimizations.FastFP && Optimizations.UseSse2)
             {
-                EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: true, isLeOrLt: true);
+                EmitCmpSseOrSse2OpF(context, CmpCondition.LessThanOrEqual, scalar: true);
             }
             else
             {
@@ -384,7 +384,7 @@ namespace ARMeilleure.Instructions
         {
             if (Optimizations.FastFP && Optimizations.UseSse2)
             {
-                EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: false, isLeOrLt: true);
+                EmitCmpSseOrSse2OpF(context, CmpCondition.LessThanOrEqual, scalar: false);
             }
             else
             {
@@ -396,7 +396,7 @@ namespace ARMeilleure.Instructions
         {
             if (Optimizations.FastFP && Optimizations.UseSse2)
             {
-                EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: true, isLeOrLt: true);
+                EmitCmpSseOrSse2OpF(context, CmpCondition.LessThan, scalar: true);
             }
             else
             {
@@ -408,7 +408,7 @@ namespace ARMeilleure.Instructions
         {
             if (Optimizations.FastFP && Optimizations.UseSse2)
             {
-                EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: false, isLeOrLt: true);
+                EmitCmpSseOrSse2OpF(context, CmpCondition.LessThan, scalar: false);
             }
             else
             {
@@ -426,7 +426,7 @@ namespace ARMeilleure.Instructions
             EmitFcmpOrFcmpe(context, signalNaNs: true);
         }
 
-        public static void EmitFccmpOrFccmpe(ArmEmitterContext context, bool signalNaNs)
+        private static void EmitFccmpOrFccmpe(ArmEmitterContext context, bool signalNaNs)
         {
             OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp;
 
@@ -435,7 +435,7 @@ namespace ARMeilleure.Instructions
 
             context.BranchIfTrue(lblTrue, InstEmitFlowHelper.GetCondTrue(context, op.Cond));
 
-            EmitSetNzcv(context, Const(op.Nzcv));
+            EmitSetNzcv(context, op.Nzcv);
 
             context.Branch(lblEnd);
 
@@ -446,27 +446,47 @@ namespace ARMeilleure.Instructions
             context.MarkLabel(lblEnd);
         }
 
+        private static void EmitSetNzcv(ArmEmitterContext context, int nzcv)
+        {
+            Operand Extract(int value, int bit)
+            {
+                if (bit != 0)
+                {
+                    value >>= bit;
+                }
+
+                value &= 1;
+
+                return Const(value);
+            }
+
+            SetFlag(context, PState.VFlag, Extract(nzcv, 0));
+            SetFlag(context, PState.CFlag, Extract(nzcv, 1));
+            SetFlag(context, PState.ZFlag, Extract(nzcv, 2));
+            SetFlag(context, PState.NFlag, Extract(nzcv, 3));
+        }
+
         private static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs)
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            const int cmpOrdered = 7;
-
             bool cmpWithZero = !(op is OpCodeSimdFcond) ? op.Bit3 : false;
 
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && (signalNaNs ? Optimizations.UseAvx : Optimizations.UseSse2))
             {
                 Operand n = GetVec(op.Rn);
                 Operand m = cmpWithZero ? context.VectorZero() : GetVec(op.Rm);
 
+                CmpCondition cmpOrdered = signalNaNs ? CmpCondition.OrderedS : CmpCondition.OrderedQ;
+
                 Operand lblNaN = Label();
                 Operand lblEnd = Label();
 
                 if (op.Size == 0)
                 {
-                    Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const(cmpOrdered));
+                    Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const((int)cmpOrdered));
 
-                    Operand isOrdered = context.VectorExtract16(ordMask, 0);
+                    Operand isOrdered = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, ordMask);
 
                     context.BranchIfFalse(lblNaN, isOrdered);
 
@@ -481,9 +501,9 @@ namespace ARMeilleure.Instructions
                 }
                 else /* if (op.Size == 1) */
                 {
-                    Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const(cmpOrdered));
+                    Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const((int)cmpOrdered));
 
-                    Operand isOrdered = context.VectorExtract16(ordMask, 0);
+                    Operand isOrdered = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, ordMask);
 
                     context.BranchIfFalse(lblNaN, isOrdered);
 
@@ -653,18 +673,7 @@ namespace ARMeilleure.Instructions
             context.Copy(GetVec(op.Rd), res);
         }
 
-        private enum CmpCondition
-        {
-            Equal              = 0,
-            GreaterThanOrEqual = 5,
-            GreaterThan        = 6
-        }
-
-        private static void EmitCmpSseOrSse2OpF(
-            ArmEmitterContext context,
-            CmpCondition cond,
-            bool scalar,
-            bool isLeOrLt = false)
+        private static void EmitCmpSseOrSse2OpF(ArmEmitterContext context, CmpCondition cond, bool scalar)
         {
             OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 
@@ -677,9 +686,7 @@ namespace ARMeilleure.Instructions
             {
                 Intrinsic inst = scalar ? Intrinsic.X86Cmpss : Intrinsic.X86Cmpps;
 
-                Operand res = isLeOrLt
-                    ? context.AddIntrinsic(inst, m, n, Const((int)cond))
-                    : context.AddIntrinsic(inst, n, m, Const((int)cond));
+                Operand res = context.AddIntrinsic(inst, n, m, Const((int)cond));
 
                 if (scalar)
                 {
@@ -696,9 +703,7 @@ namespace ARMeilleure.Instructions
             {
                 Intrinsic inst = scalar ? Intrinsic.X86Cmpsd : Intrinsic.X86Cmppd;
 
-                Operand res = isLeOrLt
-                    ? context.AddIntrinsic(inst, m, n, Const((int)cond))
-                    : context.AddIntrinsic(inst, n, m, Const((int)cond));
+                Operand res = context.AddIntrinsic(inst, n, m, Const((int)cond));
 
                 if (scalar)
                 {
@@ -709,4 +714,4 @@ namespace ARMeilleure.Instructions
             }
         }
     }
-}
\ No newline at end of file
+}
diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt.cs b/ARMeilleure/Instructions/InstEmitSimdCvt.cs
index 012bfcce2a..e2b6dbd74c 100644
--- a/ARMeilleure/Instructions/InstEmitSimdCvt.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdCvt.cs
@@ -732,8 +732,7 @@ namespace ARMeilleure.Instructions
             Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64);
             Debug.Assert((uint)size < 2);
 
-            OperandType type = size == 0 ? OperandType.FP32
-                                         : OperandType.FP64;
+            OperandType type = size == 0 ? OperandType.FP32 : OperandType.FP64;
 
             if (signed)
             {
@@ -837,15 +836,12 @@ namespace ARMeilleure.Instructions
 
             Operand n = GetVec(op.Rn);
 
-            const int cmpGreaterThanOrEqual = 5;
-            const int cmpOrdered            = 7;
-
             // sizeF == ((OpCodeSimdShImm64)op).Size - 2
             int sizeF = op.Size & 1;
 
             if (sizeF == 0)
             {
-                Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const(cmpOrdered));
+                Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
 
                 Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
 
@@ -867,7 +863,7 @@ namespace ARMeilleure.Instructions
 
                 Operand mask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648)
 
-                Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, nRnd, mask, Const(cmpGreaterThanOrEqual));
+                Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, nRnd, mask, Const((int)CmpCondition.NotLessThan));
 
                 Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, mask2);
 
@@ -884,7 +880,7 @@ namespace ARMeilleure.Instructions
             }
             else /* if (sizeF == 1) */
             {
-                Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const(cmpOrdered));
+                Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
 
                 Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
 
@@ -920,7 +916,7 @@ namespace ARMeilleure.Instructions
 
                 Operand mask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808)
 
-                Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, nRnd, mask, Const(cmpGreaterThanOrEqual));
+                Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, nRnd, mask, Const((int)CmpCondition.NotLessThan));
 
                 Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, mask2);
 
@@ -939,16 +935,12 @@ namespace ARMeilleure.Instructions
 
             Operand n = GetVec(op.Rn);
 
-            const int cmpGreaterThanOrEqual = 5;
-            const int cmpGreaterThan        = 6;
-            const int cmpOrdered            = 7;
-
             // sizeF == ((OpCodeSimdShImm)op).Size - 2
             int sizeF = op.Size & 1;
 
             if (sizeF == 0)
             {
-                Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const(cmpOrdered));
+                Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
 
                 Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
 
@@ -966,7 +958,7 @@ namespace ARMeilleure.Instructions
 
                 Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundps, nScaled, Const(X86GetRoundControl(roundMode)));
 
-                Operand nRndMask = context.AddIntrinsic(Intrinsic.X86Cmpps, nRnd, context.VectorZero(), Const(cmpGreaterThan));
+                Operand nRndMask = context.AddIntrinsic(Intrinsic.X86Cmpps, nRnd, context.VectorZero(), Const((int)CmpCondition.NotLessThanOrEqual));
 
                 Operand nRndMasked = context.AddIntrinsic(Intrinsic.X86Pand, nRnd, nRndMask);
 
@@ -976,13 +968,13 @@ namespace ARMeilleure.Instructions
 
                 Operand res = context.AddIntrinsic(Intrinsic.X86Subps, nRndMasked, mask);
 
-                Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, res, context.VectorZero(), Const(cmpGreaterThan));
+                Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, res, context.VectorZero(), Const((int)CmpCondition.NotLessThanOrEqual));
 
                 Operand resMasked = context.AddIntrinsic(Intrinsic.X86Pand, res, mask2);
 
                 res = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, resMasked);
 
-                Operand mask3 = context.AddIntrinsic(Intrinsic.X86Cmpps, resMasked, mask, Const(cmpGreaterThanOrEqual));
+                Operand mask3 = context.AddIntrinsic(Intrinsic.X86Cmpps, resMasked, mask, Const((int)CmpCondition.NotLessThan));
 
                 res = context.AddIntrinsic(Intrinsic.X86Pxor, res, mask3);
                 res = context.AddIntrinsic(Intrinsic.X86Paddd, res, nInt);
@@ -1000,7 +992,7 @@ namespace ARMeilleure.Instructions
             }
             else /* if (sizeF == 1) */
             {
-                Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const(cmpOrdered));
+                Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
 
                 Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
 
@@ -1018,7 +1010,7 @@ namespace ARMeilleure.Instructions
 
                 Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundpd, nScaled, Const(X86GetRoundControl(roundMode)));
 
-                Operand nRndMask = context.AddIntrinsic(Intrinsic.X86Cmppd, nRnd, context.VectorZero(), Const(cmpGreaterThan));
+                Operand nRndMask = context.AddIntrinsic(Intrinsic.X86Cmppd, nRnd, context.VectorZero(), Const((int)CmpCondition.NotLessThanOrEqual));
 
                 Operand nRndMasked = context.AddIntrinsic(Intrinsic.X86Pand, nRnd, nRndMask);
 
@@ -1042,7 +1034,7 @@ namespace ARMeilleure.Instructions
 
                 Operand res = context.AddIntrinsic(Intrinsic.X86Subpd, nRndMasked, mask);
 
-                Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, res, context.VectorZero(), Const(cmpGreaterThan));
+                Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, res, context.VectorZero(), Const((int)CmpCondition.NotLessThanOrEqual));
 
                 Operand resMasked = context.AddIntrinsic(Intrinsic.X86Pand, res, mask2);
 
@@ -1056,7 +1048,7 @@ namespace ARMeilleure.Instructions
 
                 res = EmitVectorLongCreate(context, low, high);
 
-                Operand mask3 = context.AddIntrinsic(Intrinsic.X86Cmppd, resMasked, mask, Const(cmpGreaterThanOrEqual));
+                Operand mask3 = context.AddIntrinsic(Intrinsic.X86Cmppd, resMasked, mask, Const((int)CmpCondition.NotLessThan));
 
                 res = context.AddIntrinsic(Intrinsic.X86Pxor, res, mask3);
                 res = context.AddIntrinsic(Intrinsic.X86Paddq, res, nInt);
diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/ARMeilleure/Instructions/InstEmitSimdHelper.cs
index a3da80fb0d..f0880079e4 100644
--- a/ARMeilleure/Instructions/InstEmitSimdHelper.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdHelper.cs
@@ -1108,6 +1108,21 @@ namespace ARMeilleure.Instructions
             }
         }
 
+        public enum CmpCondition
+        {
+            // Legacy Sse.
+            Equal              = 0, // Ordered, non-signaling.
+            LessThan           = 1, // Ordered, signaling.
+            LessThanOrEqual    = 2, // Ordered, signaling.
+            NotLessThan        = 5, // Unordered, signaling.
+            NotLessThanOrEqual = 6, // Unordered, signaling.
+            OrderedQ           = 7, // Non-signaling.
+
+            // Vex.
+            GreaterThanOrEqual = 13, // Ordered, signaling.
+            GreaterThan        = 14, // Ordered, signaling.
+            OrderedS           = 23  // Signaling.
+        }
 
         [Flags]
         public enum SaturatingFlags
diff --git a/ARMeilleure/IntermediateRepresentation/Intrinsic.cs b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs
index 1fe29e8558..e2d3c6dbaf 100644
--- a/ARMeilleure/IntermediateRepresentation/Intrinsic.cs
+++ b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs
@@ -26,6 +26,7 @@ namespace ARMeilleure.IntermediateRepresentation
         X86Cvtps2pd,
         X86Cvtsd2si,
         X86Cvtsd2ss,
+        X86Cvtsi2si,
         X86Cvtss2sd,
         X86Divpd,
         X86Divps,
diff --git a/ARMeilleure/Optimizations.cs b/ARMeilleure/Optimizations.cs
index 0b9885dc95..28af0936c8 100644
--- a/ARMeilleure/Optimizations.cs
+++ b/ARMeilleure/Optimizations.cs
@@ -15,6 +15,7 @@ namespace ARMeilleure
         public static bool UseSse41IfAvailable  { get; set; } = true;
         public static bool UseSse42IfAvailable  { get; set; } = true;
         public static bool UsePopCntIfAvailable { get; set; } = true;
+        public static bool UseAvxIfAvailable    { get; set; } = true;
 
         public static bool ForceLegacySse
         {
@@ -29,5 +30,6 @@ namespace ARMeilleure
         internal static bool UseSse41  => UseSse41IfAvailable  && HardwareCapabilities.SupportsSse41;
         internal static bool UseSse42  => UseSse42IfAvailable  && HardwareCapabilities.SupportsSse42;
         internal static bool UsePopCnt => UsePopCntIfAvailable && HardwareCapabilities.SupportsPopcnt;
+        internal static bool UseAvx    => UseAvxIfAvailable    && HardwareCapabilities.SupportsAvx && !ForceLegacySse;
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.Tests.Unicorn/Ryujinx.Tests.Unicorn.csproj b/Ryujinx.Tests.Unicorn/Ryujinx.Tests.Unicorn.csproj
index f01e8ced5f..36310f3d2a 100644
--- a/Ryujinx.Tests.Unicorn/Ryujinx.Tests.Unicorn.csproj
+++ b/Ryujinx.Tests.Unicorn/Ryujinx.Tests.Unicorn.csproj
@@ -22,7 +22,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.3.0" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.4.0" />
   </ItemGroup>
 
 </Project>
diff --git a/Ryujinx.Tests/Ryujinx.Tests.csproj b/Ryujinx.Tests/Ryujinx.Tests.csproj
index 143c4cec84..83ec2e9647 100644
--- a/Ryujinx.Tests/Ryujinx.Tests.csproj
+++ b/Ryujinx.Tests/Ryujinx.Tests.csproj
@@ -27,7 +27,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.3.0" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.4.0" />
     <PackageReference Include="NUnit" Version="3.12.0" />
     <PackageReference Include="NUnit3TestAdapter" Version="3.15.1" />
   </ItemGroup>