From e674b377104858d5068231dbe395e1038ba5d71d Mon Sep 17 00:00:00 2001
From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>
Date: Tue, 23 Oct 2018 16:12:45 +0200
Subject: [PATCH] Fix Fcvtl_V and Fcvtn_V; fix half to float conv. and add
 float to half conv. (full FP emu.). Add 4 FP Tests. (#468)

* Update CpuTest.cs

* Update CpuTestSimd.cs

* Superseded.

* Update AInstEmitSimdCvt.cs

* Update ASoftFloat.cs

* Nit.

* Update PackageReferences.

* Update AInstEmitSimdArithmetic.cs

* Update AVectorHelper.cs

* Update ASoftFloat.cs

* Update ASoftFallback.cs

* Update AThreadState.cs

* Create FPType.cs

* Create FPExc.cs

* Create FPCR.cs

* Create FPSR.cs

* Update ARoundMode.cs

* Update APState.cs

* Avoid an unwanted implicit cast of the operator >= to long, continuing to check for negative values. Remove a leftover.

* Nits.
---
 .../Instruction/AInstEmitSimdArithmetic.cs    |   8 -
 ChocolArm64/Instruction/AInstEmitSimdCmp.cs   |  17 +-
 ChocolArm64/Instruction/AInstEmitSimdCvt.cs   |  52 +-
 .../Instruction/AInstEmitSimdHelper.cs        |  10 +-
 ChocolArm64/Instruction/ASoftFallback.cs      |  39 +-
 ChocolArm64/Instruction/ASoftFloat.cs         | 606 +++++++++++++++---
 ChocolArm64/Instruction/AVectorHelper.cs      |   8 +-
 ChocolArm64/State/APState.cs                  |   4 +-
 ChocolArm64/State/ARoundMode.cs               |   4 +-
 ChocolArm64/State/AThreadState.cs             |  17 +-
 ChocolArm64/State/FPCR.cs                     |  11 +
 ChocolArm64/State/FPExc.cs                    |  12 +
 ChocolArm64/State/FPSR.cs                     |   8 +
 ChocolArm64/State/FPType.cs                   |  11 +
 Ryujinx.Tests/Cpu/CpuTest.cs                  |  42 +-
 Ryujinx.Tests/Cpu/CpuTestSimd.cs              | 167 +++++
 Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs           |  43 --
 Ryujinx.Tests/Ryujinx.Tests.csproj            |   4 +-
 18 files changed, 863 insertions(+), 200 deletions(-)
 create mode 100644 ChocolArm64/State/FPCR.cs
 create mode 100644 ChocolArm64/State/FPExc.cs
 create mode 100644 ChocolArm64/State/FPSR.cs
 create mode 100644 ChocolArm64/State/FPType.cs
 delete mode 100644 Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs

diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
index 7ba08f5e22..5a5e50f2b2 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
@@ -835,8 +835,6 @@ namespace ChocolArm64.Instruction
             {
                 Context.EmitLdarg(ATranslatedSub.StateArgIdx);
 
-                Context.EmitCallPropGet(typeof(AThreadState), nameof(AThreadState.Fpcr));
-
                 if (Op.Size == 0)
                 {
                     AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF));
@@ -862,8 +860,6 @@ namespace ChocolArm64.Instruction
             {
                 Context.EmitLdarg(ATranslatedSub.StateArgIdx);
 
-                Context.EmitCallPropGet(typeof(AThreadState), nameof(AThreadState.Fpcr));
-
                 if (SizeF == 0)
                 {
                     AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF));
@@ -938,8 +934,6 @@ namespace ChocolArm64.Instruction
             {
                 Context.EmitLdarg(ATranslatedSub.StateArgIdx);
 
-                Context.EmitCallPropGet(typeof(AThreadState), nameof(AThreadState.Fpcr));
-
                 if (Op.Size == 0)
                 {
                     AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF));
@@ -963,8 +957,6 @@ namespace ChocolArm64.Instruction
             {
                 Context.EmitLdarg(ATranslatedSub.StateArgIdx);
 
-                Context.EmitCallPropGet(typeof(AThreadState), nameof(AThreadState.Fpcr));
-
                 if (Op.Size == 0)
                 {
                     AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF));
diff --git a/ChocolArm64/Instruction/AInstEmitSimdCmp.cs b/ChocolArm64/Instruction/AInstEmitSimdCmp.cs
index 97f7623fa3..cd3480e649 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdCmp.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdCmp.cs
@@ -284,11 +284,11 @@ namespace ChocolArm64.Instruction
                 {
                     if (Op.Size == 0)
                     {
-                        Context.EmitLdc_R4(0);
+                        Context.EmitLdc_R4(0f);
                     }
-                    else /* if (SizeF == 1) */
+                    else /* if (Op.Size == 1) */
                     {
-                        Context.EmitLdc_R8(0);
+                        Context.EmitLdc_R8(0d);
                     }
                 }
                 else
@@ -378,7 +378,7 @@ namespace ChocolArm64.Instruction
                 }
                 else
                 {
-                    Context.EmitLdc_I8(0);
+                    Context.EmitLdc_I8(0L);
                 }
 
                 AILLabel LblTrue = new AILLabel();
@@ -422,7 +422,7 @@ namespace ChocolArm64.Instruction
 
                 Context.Emit(OpCodes.And);
 
-                Context.EmitLdc_I8(0);
+                Context.EmitLdc_I8(0L);
 
                 Context.Emit(OpCodes.Bne_Un_S, LblTrue);
 
@@ -455,8 +455,9 @@ namespace ChocolArm64.Instruction
             int SizeF = Op.Size & 1;
 
             int Bytes = Op.GetBitsCount() >> 3;
+            int Elems = Bytes >> SizeF + 2;
 
-            for (int Index = 0; Index < Bytes >> SizeF + 2; Index++)
+            for (int Index = 0; Index < Elems; Index++)
             {
                 EmitFcmp(Context, ILOp, Index, Scalar: false);
             }
@@ -483,11 +484,11 @@ namespace ChocolArm64.Instruction
             }
             else if (SizeF == 0)
             {
-                Context.EmitLdc_R4(0);
+                Context.EmitLdc_R4(0f);
             }
             else /* if (SizeF == 1) */
             {
-                Context.EmitLdc_R8(0);
+                Context.EmitLdc_R8(0d);
             }
 
             AILLabel LblTrue = new AILLabel();
diff --git a/ChocolArm64/Instruction/AInstEmitSimdCvt.cs b/ChocolArm64/Instruction/AInstEmitSimdCvt.cs
index 76d984a23b..f277069bff 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdCvt.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdCvt.cs
@@ -78,7 +78,7 @@ namespace ChocolArm64.Instruction
 
             int Elems = 4 >> SizeF;
 
-            int Part = Context.CurrOp.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0;
+            int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0;
 
             for (int Index = 0; Index < Elems; Index++)
             {
@@ -87,7 +87,9 @@ namespace ChocolArm64.Instruction
                     EmitVectorExtractZx(Context, Op.Rn, Part + Index, 1);
                     Context.Emit(OpCodes.Conv_U2);
 
-                    Context.EmitCall(typeof(ASoftFloat), nameof(ASoftFloat.ConvertHalfToSingle));
+                    Context.EmitLdarg(ATranslatedSub.StateArgIdx);
+
+                    Context.EmitCall(typeof(ASoftFloat16_32), nameof(ASoftFloat16_32.FPConvert));
                 }
                 else /* if (SizeF == 1) */
                 {
@@ -96,8 +98,11 @@ namespace ChocolArm64.Instruction
                     Context.Emit(OpCodes.Conv_R8);
                 }
 
-                EmitVectorInsertF(Context, Op.Rd, Index, SizeF);
+                EmitVectorInsertTmpF(Context, Index, SizeF);
             }
+
+            Context.EmitLdvectmp();
+            Context.EmitStvec(Op.Rd);
         }
 
         public static void Fcvtms_Gp(AILEmitterCtx Context)
@@ -118,28 +123,39 @@ namespace ChocolArm64.Instruction
 
             int Elems = 4 >> SizeF;
 
-            int Part = Context.CurrOp.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0;
+            int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0;
+
+            if (Part != 0)
+            {
+                Context.EmitLdvec(Op.Rd);
+                Context.EmitStvectmp();
+            }
 
             for (int Index = 0; Index < Elems; Index++)
             {
-                EmitVectorExtractF(Context, Op.Rd, Index, SizeF);
+                EmitVectorExtractF(Context, Op.Rn, Index, SizeF);
 
                 if (SizeF == 0)
                 {
-                    //TODO: This need the half precision floating point type,
-                    //that is not yet supported on .NET. We should probably
-                    //do our own implementation on the meantime.
-                    throw new NotImplementedException();
+                    Context.EmitLdarg(ATranslatedSub.StateArgIdx);
+
+                    Context.EmitCall(typeof(ASoftFloat32_16), nameof(ASoftFloat32_16.FPConvert));
+
+                    Context.Emit(OpCodes.Conv_U8);
+                    EmitVectorInsertTmp(Context, Part + Index, 1);
                 }
                 else /* if (SizeF == 1) */
                 {
                     Context.Emit(OpCodes.Conv_R4);
 
-                    EmitVectorInsertF(Context, Op.Rd, Part + Index, 0);
+                    EmitVectorInsertTmpF(Context, Part + Index, 0);
                 }
             }
 
-            if (Op.RegisterSize == ARegisterSize.SIMD64)
+            Context.EmitLdvectmp();
+            Context.EmitStvec(Op.Rd);
+
+            if (Part == 0)
             {
                 EmitVectorZeroUpper(Context, Op.Rd);
             }
@@ -445,8 +461,9 @@ namespace ChocolArm64.Instruction
             int FBits = GetFBits(Context);
 
             int Bytes = Op.GetBitsCount() >> 3;
+            int Elems = Bytes >> SizeI;
 
-            for (int Index = 0; Index < (Bytes >> SizeI); Index++)
+            for (int Index = 0; Index < Elems; Index++)
             {
                 EmitVectorExtract(Context, Op.Rn, Index, SizeI, Signed);
 
@@ -534,8 +551,9 @@ namespace ChocolArm64.Instruction
             int FBits = GetFBits(Context);
 
             int Bytes = Op.GetBitsCount() >> 3;
+            int Elems = Bytes >> SizeI;
 
-            for (int Index = 0; Index < (Bytes >> SizeI); Index++)
+            for (int Index = 0; Index < Elems; Index++)
             {
                 EmitVectorExtractF(Context, Op.Rn, Index, SizeF);
 
@@ -640,11 +658,11 @@ namespace ChocolArm64.Instruction
             {
                 if (Size == 0)
                 {
-                    Context.EmitLdc_R4(MathF.Pow(2, FBits));
+                    Context.EmitLdc_R4(MathF.Pow(2f, FBits));
                 }
                 else if (Size == 1)
                 {
-                    Context.EmitLdc_R8(Math.Pow(2, FBits));
+                    Context.EmitLdc_R8(Math.Pow(2d, FBits));
                 }
                 else
                 {
@@ -661,11 +679,11 @@ namespace ChocolArm64.Instruction
             {
                 if (Size == 0)
                 {
-                    Context.EmitLdc_R4(1f / MathF.Pow(2, FBits));
+                    Context.EmitLdc_R4(1f / MathF.Pow(2f, FBits));
                 }
                 else if (Size == 1)
                 {
-                    Context.EmitLdc_R8(1 / Math.Pow(2, FBits));
+                    Context.EmitLdc_R8(1d / Math.Pow(2d, FBits));
                 }
                 else
                 {
diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
index dd39f52d50..ff08283120 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
@@ -1274,8 +1274,6 @@ namespace ChocolArm64.Instruction
         {
             ThrowIfInvalid(Index, Size);
 
-            IAOpCodeSimd Op = (IAOpCodeSimd)Context.CurrOp;
-
             Context.EmitLdvec(Reg);
             Context.EmitLdc_I4(Index);
             Context.EmitLdc_I4(Size);
@@ -1470,12 +1468,12 @@ namespace ChocolArm64.Instruction
 
         private static void ThrowIfInvalid(int Index, int Size)
         {
-            if ((uint)Size > 3)
+            if ((uint)Size > 3u)
             {
                 throw new ArgumentOutOfRangeException(nameof(Size));
             }
 
-            if ((uint)Index >= 16 >> Size)
+            if ((uint)Index >= 16u >> Size)
             {
                 throw new ArgumentOutOfRangeException(nameof(Index));
             }
@@ -1483,12 +1481,12 @@ namespace ChocolArm64.Instruction
 
         private static void ThrowIfInvalidF(int Index, int Size)
         {
-            if ((uint)Size > 1)
+            if ((uint)Size > 1u)
             {
                 throw new ArgumentOutOfRangeException(nameof(Size));
             }
 
-            if ((uint)Index >= 4 >> Size)
+            if ((uint)Index >= 4u >> Size)
             {
                 throw new ArgumentOutOfRangeException(nameof(Index));
             }
diff --git a/ChocolArm64/Instruction/ASoftFallback.cs b/ChocolArm64/Instruction/ASoftFallback.cs
index 3c5c5c4d9e..b69e2c75e3 100644
--- a/ChocolArm64/Instruction/ASoftFallback.cs
+++ b/ChocolArm64/Instruction/ASoftFallback.cs
@@ -112,13 +112,13 @@ namespace ChocolArm64.Instruction
 
             if (op > TMaxValue)
             {
-                SetFpsrQCFlag(State);
+                State.SetFpsrFlag(FPSR.QC);
 
                 return TMaxValue;
             }
             else if (op < TMinValue)
             {
-                SetFpsrQCFlag(State);
+                State.SetFpsrFlag(FPSR.QC);
 
                 return TMinValue;
             }
@@ -137,13 +137,13 @@ namespace ChocolArm64.Instruction
 
             if (op > (long)TMaxValue)
             {
-                SetFpsrQCFlag(State);
+                State.SetFpsrFlag(FPSR.QC);
 
                 return TMaxValue;
             }
             else if (op < (long)TMinValue)
             {
-                SetFpsrQCFlag(State);
+                State.SetFpsrFlag(FPSR.QC);
 
                 return TMinValue;
             }
@@ -161,7 +161,7 @@ namespace ChocolArm64.Instruction
 
             if (op > (ulong)TMaxValue)
             {
-                SetFpsrQCFlag(State);
+                State.SetFpsrFlag(FPSR.QC);
 
                 return TMaxValue;
             }
@@ -179,7 +179,7 @@ namespace ChocolArm64.Instruction
 
             if (op > TMaxValue)
             {
-                SetFpsrQCFlag(State);
+                State.SetFpsrFlag(FPSR.QC);
 
                 return TMaxValue;
             }
@@ -193,7 +193,7 @@ namespace ChocolArm64.Instruction
         {
             if (op == long.MinValue)
             {
-                SetFpsrQCFlag(State);
+                State.SetFpsrFlag(FPSR.QC);
 
                 return long.MaxValue;
             }
@@ -209,7 +209,7 @@ namespace ChocolArm64.Instruction
 
             if ((~(op1 ^ op2) & (op1 ^ Add)) < 0L)
             {
-                SetFpsrQCFlag(State);
+                State.SetFpsrFlag(FPSR.QC);
 
                 if (op1 < 0L)
                 {
@@ -232,7 +232,7 @@ namespace ChocolArm64.Instruction
 
             if ((Add < op1) && (Add < op2))
             {
-                SetFpsrQCFlag(State);
+                State.SetFpsrFlag(FPSR.QC);
 
                 return ulong.MaxValue;
             }
@@ -248,7 +248,7 @@ namespace ChocolArm64.Instruction
 
             if (((op1 ^ op2) & (op1 ^ Sub)) < 0L)
             {
-                SetFpsrQCFlag(State);
+                State.SetFpsrFlag(FPSR.QC);
 
                 if (op1 < 0L)
                 {
@@ -271,7 +271,7 @@ namespace ChocolArm64.Instruction
 
             if (op1 < op2)
             {
-                SetFpsrQCFlag(State);
+                State.SetFpsrFlag(FPSR.QC);
 
                 return ulong.MinValue;
             }
@@ -292,7 +292,7 @@ namespace ChocolArm64.Instruction
 
                 if ((~op2 & Add) < 0L)
                 {
-                    SetFpsrQCFlag(State);
+                    State.SetFpsrFlag(FPSR.QC);
 
                     return long.MaxValue;
                 }
@@ -306,7 +306,7 @@ namespace ChocolArm64.Instruction
                 // op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
                 // op2 from (long)ulong.MinValue to long.MaxValue
 
-                SetFpsrQCFlag(State);
+                State.SetFpsrFlag(FPSR.QC);
 
                 return long.MaxValue;
             }
@@ -319,7 +319,7 @@ namespace ChocolArm64.Instruction
 
                 if (Add > (ulong)long.MaxValue)
                 {
-                    SetFpsrQCFlag(State);
+                    State.SetFpsrFlag(FPSR.QC);
 
                     return long.MaxValue;
                 }
@@ -341,7 +341,7 @@ namespace ChocolArm64.Instruction
 
                 if ((Add < (ulong)op1) && (Add < op2))
                 {
-                    SetFpsrQCFlag(State);
+                    State.SetFpsrFlag(FPSR.QC);
 
                     return ulong.MaxValue;
                 }
@@ -366,7 +366,7 @@ namespace ChocolArm64.Instruction
 
                 if (Add < (long)ulong.MinValue)
                 {
-                    SetFpsrQCFlag(State);
+                    State.SetFpsrFlag(FPSR.QC);
 
                     return ulong.MinValue;
                 }
@@ -376,13 +376,6 @@ namespace ChocolArm64.Instruction
                 }
             }
         }
-
-        private static void SetFpsrQCFlag(AThreadState State)
-        {
-            const int QCFlagBit = 27;
-
-            State.Fpsr |= 1 << QCFlagBit;
-        }
 #endregion
 
 #region "Count"
diff --git a/ChocolArm64/Instruction/ASoftFloat.cs b/ChocolArm64/Instruction/ASoftFloat.cs
index 2d9a9f0ebe..0912257a49 100644
--- a/ChocolArm64/Instruction/ASoftFloat.cs
+++ b/ChocolArm64/Instruction/ASoftFloat.cs
@@ -195,41 +195,535 @@ namespace ChocolArm64.Instruction
             ulong result = x_sign | (result_exp << 52) | fraction;
             return BitConverter.Int64BitsToDouble((long)result);
         }
+    }
 
-        public static float ConvertHalfToSingle(ushort x)
+    static class ASoftFloat16_32
+    {
+        public static float FPConvert(ushort ValueBits, AThreadState State)
         {
-            uint x_sign = (uint)(x >> 15) & 0x0001;
-            uint x_exp = (uint)(x >> 10) & 0x001F;
-            uint x_mantissa = (uint)x & 0x03FF;
+            Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat16_32.FPConvert: State.Fpcr = 0x{State.Fpcr:X8}");
 
-            if (x_exp == 0 && x_mantissa == 0)
+            double Real = ValueBits.FPUnpackCV(out FPType Type, out bool Sign, State);
+
+            float Result;
+
+            if (Type == FPType.SNaN || Type == FPType.QNaN)
             {
-                // Zero
-                return BitConverter.Int32BitsToSingle((int)(x_sign << 31));
-            }
-
-            if (x_exp == 0x1F)
-            {
-                // NaN or Infinity
-                return BitConverter.Int32BitsToSingle((int)((x_sign << 31) | 0x7F800000 | (x_mantissa << 13)));
-            }
-
-            int exponent = (int)x_exp - 15;
-
-            if (x_exp == 0)
-            {
-                // Denormal
-                x_mantissa <<= 1;
-                while ((x_mantissa & 0x0400) == 0)
+                if (State.GetFpcrFlag(FPCR.DN))
                 {
-                    x_mantissa <<= 1;
-                    exponent--;
+                    Result = FPDefaultNaN();
                 }
-                x_mantissa &= 0x03FF;
+                else
+                {
+                    Result = FPConvertNaN(ValueBits);
+                }
+
+                if (Type == FPType.SNaN)
+                {
+                    FPProcessException(FPExc.InvalidOp, State);
+                }
+            }
+            else if (Type == FPType.Infinity)
+            {
+                Result = FPInfinity(Sign);
+            }
+            else if (Type == FPType.Zero)
+            {
+                Result = FPZero(Sign);
+            }
+            else
+            {
+                Result = FPRoundCV(Real, State);
             }
 
-            uint new_exp = (uint)((exponent + 127) & 0xFF) << 23;
-            return BitConverter.Int32BitsToSingle((int)((x_sign << 31) | new_exp | (x_mantissa << 13)));
+            return Result;
+        }
+
+        private static float FPDefaultNaN()
+        {
+            return -float.NaN;
+        }
+
+        private static float FPInfinity(bool Sign)
+        {
+            return Sign ? float.NegativeInfinity : float.PositiveInfinity;
+        }
+
+        private static float FPZero(bool Sign)
+        {
+            return Sign ? -0f : +0f;
+        }
+
+        private static float FPMaxNormal(bool Sign)
+        {
+            return Sign ? float.MinValue : float.MaxValue;
+        }
+
+        private static double FPUnpackCV(this ushort ValueBits, out FPType Type, out bool Sign, AThreadState State)
+        {
+            Sign = (~(uint)ValueBits & 0x8000u) == 0u;
+
+            uint Exp16  = ((uint)ValueBits & 0x7C00u) >> 10;
+            uint Frac16 =  (uint)ValueBits & 0x03FFu;
+
+            double Real;
+
+            if (Exp16 == 0u)
+            {
+                if (Frac16 == 0u)
+                {
+                    Type = FPType.Zero;
+                    Real = 0d;
+                }
+                else
+                {
+                    Type = FPType.Nonzero; // Subnormal.
+                    Real = Math.Pow(2d, -14) * ((double)Frac16 * Math.Pow(2d, -10));
+                }
+            }
+            else if (Exp16 == 0x1Fu && !State.GetFpcrFlag(FPCR.AHP))
+            {
+                if (Frac16 == 0u)
+                {
+                    Type = FPType.Infinity;
+                    Real = Math.Pow(2d, 1000);
+                }
+                else
+                {
+                    Type = (~Frac16 & 0x0200u) == 0u ? FPType.QNaN : FPType.SNaN;
+                    Real = 0d;
+                }
+            }
+            else
+            {
+                Type = FPType.Nonzero; // Normal.
+                Real = Math.Pow(2d, (int)Exp16 - 15) * (1d + (double)Frac16 * Math.Pow(2d, -10));
+            }
+
+            return Sign ? -Real : Real;
+        }
+
+        private static float FPRoundCV(double Real, AThreadState State)
+        {
+            const int MinimumExp = -126;
+
+            const int E = 8;
+            const int F = 23;
+
+            bool   Sign;
+            double Mantissa;
+
+            if (Real < 0d)
+            {
+                Sign     = true;
+                Mantissa = -Real;
+            }
+            else
+            {
+                Sign     = false;
+                Mantissa = Real;
+            }
+
+            int Exponent = 0;
+
+            while (Mantissa < 1d)
+            {
+                Mantissa *= 2d;
+                Exponent--;
+            }
+
+            while (Mantissa >= 2d)
+            {
+                Mantissa /= 2d;
+                Exponent++;
+            }
+
+            if (State.GetFpcrFlag(FPCR.FZ) && Exponent < MinimumExp)
+            {
+                State.SetFpsrFlag(FPSR.UFC);
+
+                return FPZero(Sign);
+            }
+
+            uint BiasedExp = (uint)Math.Max(Exponent - MinimumExp + 1, 0);
+
+            if (BiasedExp == 0u)
+            {
+                Mantissa /= Math.Pow(2d, MinimumExp - Exponent);
+            }
+
+            uint IntMant = (uint)Math.Floor(Mantissa * Math.Pow(2d, F));
+            double Error = Mantissa * Math.Pow(2d, F) - (double)IntMant;
+
+            if (BiasedExp == 0u && (Error != 0d || State.GetFpcrFlag(FPCR.UFE)))
+            {
+                FPProcessException(FPExc.Underflow, State);
+            }
+
+            bool OverflowToInf;
+            bool RoundUp;
+
+            switch (State.FPRoundingMode())
+            {
+                default:
+                case ARoundMode.ToNearest:
+                    RoundUp       = (Error > 0.5d || (Error == 0.5d && (IntMant & 1u) == 1u));
+                    OverflowToInf = true;
+                    break;
+
+                case ARoundMode.TowardsPlusInfinity:
+                    RoundUp       = (Error != 0d && !Sign);
+                    OverflowToInf = !Sign;
+                    break;
+
+                case ARoundMode.TowardsMinusInfinity:
+                    RoundUp       = (Error != 0d && Sign);
+                    OverflowToInf = Sign;
+                    break;
+
+                case ARoundMode.TowardsZero:
+                    RoundUp       = false;
+                    OverflowToInf = false;
+                    break;
+            }
+
+            if (RoundUp)
+            {
+                IntMant++;
+
+                if (IntMant == (uint)Math.Pow(2d, F))
+                {
+                    BiasedExp = 1u;
+                }
+
+                if (IntMant == (uint)Math.Pow(2d, F + 1))
+                {
+                    BiasedExp++;
+                    IntMant >>= 1;
+                }
+            }
+
+            float Result;
+
+            if (BiasedExp >= (uint)Math.Pow(2d, E) - 1u)
+            {
+                Result = OverflowToInf ? FPInfinity(Sign) : FPMaxNormal(Sign);
+
+                FPProcessException(FPExc.Overflow, State);
+
+                Error = 1d;
+            }
+            else
+            {
+                Result = BitConverter.Int32BitsToSingle(
+                    (int)((Sign ? 1u : 0u) << 31 | (BiasedExp & 0xFFu) << 23 | (IntMant & 0x007FFFFFu)));
+            }
+
+            if (Error != 0d)
+            {
+                FPProcessException(FPExc.Inexact, State);
+            }
+
+            return Result;
+        }
+
+        private static float FPConvertNaN(ushort ValueBits)
+        {
+            return BitConverter.Int32BitsToSingle(
+                (int)(((uint)ValueBits & 0x8000u) << 16 | 0x7FC00000u | ((uint)ValueBits & 0x01FFu) << 13));
+        }
+
+        private static void FPProcessException(FPExc Exc, AThreadState State)
+        {
+            int Enable = (int)Exc + 8;
+
+            if ((State.Fpcr & (1 << Enable)) != 0)
+            {
+                throw new NotImplementedException("floating-point trap handling");
+            }
+            else
+            {
+                State.Fpsr |= 1 << (int)Exc;
+            }
+        }
+    }
+
+    static class ASoftFloat32_16
+    {
+        public static ushort FPConvert(float Value, AThreadState State)
+        {
+            Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat32_16.FPConvert: State.Fpcr = 0x{State.Fpcr:X8}");
+
+            double Real = Value.FPUnpackCV(out FPType Type, out bool Sign, State, out uint ValueBits);
+
+            bool AltHp = State.GetFpcrFlag(FPCR.AHP);
+
+            ushort ResultBits;
+
+            if (Type == FPType.SNaN || Type == FPType.QNaN)
+            {
+                if (AltHp)
+                {
+                    ResultBits = FPZero(Sign);
+                }
+                else if (State.GetFpcrFlag(FPCR.DN))
+                {
+                    ResultBits = FPDefaultNaN();
+                }
+                else
+                {
+                    ResultBits = FPConvertNaN(ValueBits);
+                }
+
+                if (Type == FPType.SNaN || AltHp)
+                {
+                    FPProcessException(FPExc.InvalidOp, State);
+                }
+            }
+            else if (Type == FPType.Infinity)
+            {
+                if (AltHp)
+                {
+                    ResultBits = (ushort)((Sign ? 1u : 0u) << 15 | 0x7FFFu);
+
+                    FPProcessException(FPExc.InvalidOp, State);
+                }
+                else
+                {
+                    ResultBits = FPInfinity(Sign);
+                }
+            }
+            else if (Type == FPType.Zero)
+            {
+                ResultBits = FPZero(Sign);
+            }
+            else
+            {
+                ResultBits = FPRoundCV(Real, State);
+            }
+
+            return ResultBits;
+        }
+
+        private static ushort FPDefaultNaN()
+        {
+            return (ushort)0x7E00u;
+        }
+
+        private static ushort FPInfinity(bool Sign)
+        {
+            return Sign ? (ushort)0xFC00u : (ushort)0x7C00u;
+        }
+
+        private static ushort FPZero(bool Sign)
+        {
+            return Sign ? (ushort)0x8000u : (ushort)0x0000u;
+        }
+
+        private static ushort FPMaxNormal(bool Sign)
+        {
+            return Sign ? (ushort)0xFBFFu : (ushort)0x7BFFu;
+        }
+
+        private static double FPUnpackCV(this float Value, out FPType Type, out bool Sign, AThreadState State, out uint ValueBits)
+        {
+            ValueBits = (uint)BitConverter.SingleToInt32Bits(Value);
+
+            Sign = (~ValueBits & 0x80000000u) == 0u;
+
+            uint Exp32  = (ValueBits & 0x7F800000u) >> 23;
+            uint Frac32 =  ValueBits & 0x007FFFFFu;
+
+            double Real;
+
+            if (Exp32 == 0u)
+            {
+                if (Frac32 == 0u || State.GetFpcrFlag(FPCR.FZ))
+                {
+                    Type = FPType.Zero;
+                    Real = 0d;
+
+                    if (Frac32 != 0u) FPProcessException(FPExc.InputDenorm, State);
+                }
+                else
+                {
+                    Type = FPType.Nonzero; // Subnormal.
+                    Real = Math.Pow(2d, -126) * ((double)Frac32 * Math.Pow(2d, -23));
+                }
+            }
+            else if (Exp32 == 0xFFu)
+            {
+                if (Frac32 == 0u)
+                {
+                    Type = FPType.Infinity;
+                    Real = Math.Pow(2d, 1000);
+                }
+                else
+                {
+                    Type = (~Frac32 & 0x00400000u) == 0u ? FPType.QNaN : FPType.SNaN;
+                    Real = 0d;
+                }
+            }
+            else
+            {
+                Type = FPType.Nonzero; // Normal.
+                Real = Math.Pow(2d, (int)Exp32 - 127) * (1d + (double)Frac32 * Math.Pow(2d, -23));
+            }
+
+            return Sign ? -Real : Real;
+        }
+
+        private static ushort FPRoundCV(double Real, AThreadState State)
+        {
+            const int MinimumExp = -14;
+
+            const int E = 5;
+            const int F = 10;
+
+            bool   Sign;
+            double Mantissa;
+
+            if (Real < 0d)
+            {
+                Sign     = true;
+                Mantissa = -Real;
+            }
+            else
+            {
+                Sign     = false;
+                Mantissa = Real;
+            }
+
+            int Exponent = 0;
+
+            while (Mantissa < 1d)
+            {
+                Mantissa *= 2d;
+                Exponent--;
+            }
+
+            while (Mantissa >= 2d)
+            {
+                Mantissa /= 2d;
+                Exponent++;
+            }
+
+            uint BiasedExp = (uint)Math.Max(Exponent - MinimumExp + 1, 0);
+
+            if (BiasedExp == 0u)
+            {
+                Mantissa /= Math.Pow(2d, MinimumExp - Exponent);
+            }
+
+            uint IntMant = (uint)Math.Floor(Mantissa * Math.Pow(2d, F));
+            double Error = Mantissa * Math.Pow(2d, F) - (double)IntMant;
+
+            if (BiasedExp == 0u && (Error != 0d || State.GetFpcrFlag(FPCR.UFE)))
+            {
+                FPProcessException(FPExc.Underflow, State);
+            }
+
+            bool OverflowToInf;
+            bool RoundUp;
+
+            switch (State.FPRoundingMode())
+            {
+                default:
+                case ARoundMode.ToNearest:
+                    RoundUp       = (Error > 0.5d || (Error == 0.5d && (IntMant & 1u) == 1u));
+                    OverflowToInf = true;
+                    break;
+
+                case ARoundMode.TowardsPlusInfinity:
+                    RoundUp       = (Error != 0d && !Sign);
+                    OverflowToInf = !Sign;
+                    break;
+
+                case ARoundMode.TowardsMinusInfinity:
+                    RoundUp       = (Error != 0d && Sign);
+                    OverflowToInf = Sign;
+                    break;
+
+                case ARoundMode.TowardsZero:
+                    RoundUp       = false;
+                    OverflowToInf = false;
+                    break;
+            }
+
+            if (RoundUp)
+            {
+                IntMant++;
+
+                if (IntMant == (uint)Math.Pow(2d, F))
+                {
+                    BiasedExp = 1u;
+                }
+
+                if (IntMant == (uint)Math.Pow(2d, F + 1))
+                {
+                    BiasedExp++;
+                    IntMant >>= 1;
+                }
+            }
+
+            ushort ResultBits;
+
+            if (!State.GetFpcrFlag(FPCR.AHP))
+            {
+                if (BiasedExp >= (uint)Math.Pow(2d, E) - 1u)
+                {
+                    ResultBits = OverflowToInf ? FPInfinity(Sign) : FPMaxNormal(Sign);
+
+                    FPProcessException(FPExc.Overflow, State);
+
+                    Error = 1d;
+                }
+                else
+                {
+                    ResultBits = (ushort)((Sign ? 1u : 0u) << 15 | (BiasedExp & 0x1Fu) << 10 | (IntMant & 0x03FFu));
+                }
+            }
+            else
+            {
+                if (BiasedExp >= (uint)Math.Pow(2d, E))
+                {
+                    ResultBits = (ushort)((Sign ? 1u : 0u) << 15 | 0x7FFFu);
+
+                    FPProcessException(FPExc.InvalidOp, State);
+
+                    Error = 0d;
+                }
+                else
+                {
+                    ResultBits = (ushort)((Sign ? 1u : 0u) << 15 | (BiasedExp & 0x1Fu) << 10 | (IntMant & 0x03FFu));
+                }
+            }
+
+            if (Error != 0d)
+            {
+                FPProcessException(FPExc.Inexact, State);
+            }
+
+            return ResultBits;
+        }
+
+        private static ushort FPConvertNaN(uint ValueBits)
+        {
+            return (ushort)((ValueBits & 0x80000000u) >> 16 | 0x7E00u | (ValueBits & 0x003FE000u) >> 13);
+        }
+
+        private static void FPProcessException(FPExc Exc, AThreadState State)
+        {
+            int Enable = (int)Exc + 8;
+
+            if ((State.Fpcr & (1 << Enable)) != 0)
+            {
+                throw new NotImplementedException("floating-point trap handling");
+            }
+            else
+            {
+                State.Fpsr |= 1 << (int)Exc;
+            }
         }
     }
 
@@ -756,56 +1250,31 @@ namespace ChocolArm64.Instruction
             return Result;
         }
 
-        private enum FPType
-        {
-            Nonzero,
-            Zero,
-            Infinity,
-            QNaN,
-            SNaN
-        }
-
-        private enum FPExc
-        {
-            InvalidOp,
-            DivideByZero,
-            Overflow,
-            Underflow,
-            Inexact,
-            InputDenorm = 7
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private static float FPDefaultNaN()
         {
             return -float.NaN;
         }
 
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private static float FPInfinity(bool Sign)
         {
             return Sign ? float.NegativeInfinity : float.PositiveInfinity;
         }
 
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private static float FPZero(bool Sign)
         {
             return Sign ? -0f : +0f;
         }
 
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private static float FPTwo(bool Sign)
         {
             return Sign ? -2f : +2f;
         }
 
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private static float FPOnePointFive(bool Sign)
         {
             return Sign ? -1.5f : +1.5f;
         }
 
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private static float FPNeg(this float Value)
         {
             return -Value;
@@ -927,8 +1396,6 @@ namespace ChocolArm64.Instruction
 
         private static float FPProcessNaN(FPType Type, uint Op, AThreadState State)
         {
-            const int DNBit = 25; // Default NaN mode control bit.
-
             if (Type == FPType.SNaN)
             {
                 Op |= 1u << 22;
@@ -936,7 +1403,7 @@ namespace ChocolArm64.Instruction
                 FPProcessException(FPExc.InvalidOp, State);
             }
 
-            if ((State.Fpcr & (1 << DNBit)) != 0)
+            if (State.GetFpcrFlag(FPCR.DN))
             {
                 return FPDefaultNaN();
             }
@@ -1482,56 +1949,31 @@ namespace ChocolArm64.Instruction
             return Result;
         }
 
-        private enum FPType
-        {
-            Nonzero,
-            Zero,
-            Infinity,
-            QNaN,
-            SNaN
-        }
-
-        private enum FPExc
-        {
-            InvalidOp,
-            DivideByZero,
-            Overflow,
-            Underflow,
-            Inexact,
-            InputDenorm = 7
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private static double FPDefaultNaN()
         {
             return -double.NaN;
         }
 
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private static double FPInfinity(bool Sign)
         {
             return Sign ? double.NegativeInfinity : double.PositiveInfinity;
         }
 
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private static double FPZero(bool Sign)
         {
             return Sign ? -0d : +0d;
         }
 
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private static double FPTwo(bool Sign)
         {
             return Sign ? -2d : +2d;
         }
 
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private static double FPOnePointFive(bool Sign)
         {
             return Sign ? -1.5d : +1.5d;
         }
 
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private static double FPNeg(this double Value)
         {
             return -Value;
@@ -1653,8 +2095,6 @@ namespace ChocolArm64.Instruction
 
         private static double FPProcessNaN(FPType Type, ulong Op, AThreadState State)
         {
-            const int DNBit = 25; // Default NaN mode control bit.
-
             if (Type == FPType.SNaN)
             {
                 Op |= 1ul << 51;
@@ -1662,7 +2102,7 @@ namespace ChocolArm64.Instruction
                 FPProcessException(FPExc.InvalidOp, State);
             }
 
-            if ((State.Fpcr & (1 << DNBit)) != 0)
+            if (State.GetFpcrFlag(FPCR.DN))
             {
                 return FPDefaultNaN();
             }
diff --git a/ChocolArm64/Instruction/AVectorHelper.cs b/ChocolArm64/Instruction/AVectorHelper.cs
index 7f9d98cd83..41e865b94a 100644
--- a/ChocolArm64/Instruction/AVectorHelper.cs
+++ b/ChocolArm64/Instruction/AVectorHelper.cs
@@ -105,9 +105,9 @@ namespace ChocolArm64.Instruction
                    Value < ulong.MinValue ? ulong.MinValue : (ulong)Value;
         }
 
-        public static double Round(double Value, int Fpcr)
+        public static double Round(double Value, AThreadState State)
         {
-            switch ((ARoundMode)((Fpcr >> 22) & 3))
+            switch (State.FPRoundingMode())
             {
                 case ARoundMode.ToNearest:            return Math.Round   (Value);
                 case ARoundMode.TowardsPlusInfinity:  return Math.Ceiling (Value);
@@ -118,9 +118,9 @@ namespace ChocolArm64.Instruction
             throw new InvalidOperationException();
         }
 
-        public static float RoundF(float Value, int Fpcr)
+        public static float RoundF(float Value, AThreadState State)
         {
-            switch ((ARoundMode)((Fpcr >> 22) & 3))
+            switch (State.FPRoundingMode())
             {
                 case ARoundMode.ToNearest:            return MathF.Round   (Value);
                 case ARoundMode.TowardsPlusInfinity:  return MathF.Ceiling (Value);
diff --git a/ChocolArm64/State/APState.cs b/ChocolArm64/State/APState.cs
index f55431a661..aaf0ff0ce1 100644
--- a/ChocolArm64/State/APState.cs
+++ b/ChocolArm64/State/APState.cs
@@ -3,7 +3,7 @@ using System;
 namespace ChocolArm64.State
 {
     [Flags]
-    public enum APState
+    enum APState
     {
         VBit = 28,
         CBit = 29,
@@ -20,4 +20,4 @@ namespace ChocolArm64.State
 
         NZCV = NZ | CV
     }
-}
\ No newline at end of file
+}
diff --git a/ChocolArm64/State/ARoundMode.cs b/ChocolArm64/State/ARoundMode.cs
index 9896f3075e..297d0137cb 100644
--- a/ChocolArm64/State/ARoundMode.cs
+++ b/ChocolArm64/State/ARoundMode.cs
@@ -1,10 +1,10 @@
 namespace ChocolArm64.State
 {
-    public enum ARoundMode
+    enum ARoundMode
     {
         ToNearest            = 0,
         TowardsPlusInfinity  = 1,
         TowardsMinusInfinity = 2,
         TowardsZero          = 3
     }
-}
\ No newline at end of file
+}
diff --git a/ChocolArm64/State/AThreadState.cs b/ChocolArm64/State/AThreadState.cs
index e4953b021c..fbfac5bcac 100644
--- a/ChocolArm64/State/AThreadState.cs
+++ b/ChocolArm64/State/AThreadState.cs
@@ -145,5 +145,20 @@ namespace ChocolArm64.State
         {
             Undefined?.Invoke(this, new AInstUndefinedEventArgs(Position, RawOpCode));
         }
+
+        internal bool GetFpcrFlag(FPCR Flag)
+        {
+            return (Fpcr & (1 << (int)Flag)) != 0;
+        }
+
+        internal void SetFpsrFlag(FPSR Flag)
+        {
+            Fpsr |= 1 << (int)Flag;
+        }
+
+        internal ARoundMode FPRoundingMode()
+        {
+            return (ARoundMode)((Fpcr >> (int)FPCR.RMode) & 3);
+        }
     }
-}
\ No newline at end of file
+}
diff --git a/ChocolArm64/State/FPCR.cs b/ChocolArm64/State/FPCR.cs
new file mode 100644
index 0000000000..8f47cf9087
--- /dev/null
+++ b/ChocolArm64/State/FPCR.cs
@@ -0,0 +1,11 @@
+namespace ChocolArm64.State
+{
+    enum FPCR
+    {
+        UFE   = 11,
+        RMode = 22,
+        FZ    = 24,
+        DN    = 25,
+        AHP   = 26
+    }
+}
diff --git a/ChocolArm64/State/FPExc.cs b/ChocolArm64/State/FPExc.cs
new file mode 100644
index 0000000000..a665957d6d
--- /dev/null
+++ b/ChocolArm64/State/FPExc.cs
@@ -0,0 +1,12 @@
+namespace ChocolArm64.State
+{
+    enum FPExc
+    {
+        InvalidOp    = 0,
+        DivideByZero = 1,
+        Overflow     = 2,
+        Underflow    = 3,
+        Inexact      = 4,
+        InputDenorm  = 7
+    }
+}
diff --git a/ChocolArm64/State/FPSR.cs b/ChocolArm64/State/FPSR.cs
new file mode 100644
index 0000000000..d71cde7852
--- /dev/null
+++ b/ChocolArm64/State/FPSR.cs
@@ -0,0 +1,8 @@
+namespace ChocolArm64.State
+{
+    enum FPSR
+    {
+        UFC = 3,
+        QC  = 27
+    }
+}
diff --git a/ChocolArm64/State/FPType.cs b/ChocolArm64/State/FPType.cs
new file mode 100644
index 0000000000..b00f5fee0d
--- /dev/null
+++ b/ChocolArm64/State/FPType.cs
@@ -0,0 +1,11 @@
+namespace ChocolArm64.State
+{
+    enum FPType
+    {
+        Nonzero,
+        Zero,
+        Infinity,
+        QNaN,
+        SNaN
+    }
+}
diff --git a/Ryujinx.Tests/Cpu/CpuTest.cs b/Ryujinx.Tests/Cpu/CpuTest.cs
index 24585fe78f..4587189b90 100644
--- a/Ryujinx.Tests/Cpu/CpuTest.cs
+++ b/Ryujinx.Tests/Cpu/CpuTest.cs
@@ -178,11 +178,30 @@ namespace Ryujinx.Tests.Cpu
             return GetThreadState();
         }
 
+        /// <summary>Rounding Mode control field.</summary>
+        public enum RMode
+        {
+            /// <summary>Round to Nearest (RN) mode.</summary>
+            RN,
+            /// <summary>Round towards Plus Infinity (RP) mode.</summary>
+            RP,
+            /// <summary>Round towards Minus Infinity (RM) mode.</summary>
+            RM,
+            /// <summary>Round towards Zero (RZ) mode.</summary>
+            RZ
+        };
+
         /// <summary>Floating-point Control Register.</summary>
         protected enum FPCR
         {
+            /// <summary>Rounding Mode control field.</summary>
+            RMode = 22,
+            /// <summary>Flush-to-zero mode control bit.</summary>
+            FZ    = 24,
             /// <summary>Default NaN mode control bit.</summary>
-            DN = 25
+            DN    = 25,
+            /// <summary>Alternative half-precision control bit.</summary>
+            AHP   = 26
         }
 
         /// <summary>Floating-point Status Register.</summary>
@@ -514,6 +533,27 @@ namespace Ryujinx.Tests.Cpu
             return Sse41.Extract(Sse.StaticCast<float, ulong>(Vector), (byte)1);
         }
 
+        protected static ushort GenNormal_H()
+        {
+            uint Rnd;
+
+            do       Rnd = TestContext.CurrentContext.Random.NextUShort();
+            while (( Rnd & 0x7C00u) == 0u ||
+                   (~Rnd & 0x7C00u) == 0u);
+
+            return (ushort)Rnd;
+        }
+
+        protected static ushort GenSubnormal_H()
+        {
+            uint Rnd;
+
+            do      Rnd = TestContext.CurrentContext.Random.NextUShort();
+            while ((Rnd & 0x03FFu) == 0u);
+
+            return (ushort)(Rnd & 0x83FFu);
+        }
+
         protected static uint GenNormal_S()
         {
             uint Rnd;
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs
index 279f9f0c3e..795d649abb 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs
@@ -79,6 +79,47 @@ namespace Ryujinx.Tests.Cpu
                                  0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul };
         }
 
+        private static IEnumerable<ulong> _4H_F_()
+        {
+            yield return 0xFBFFFBFFFBFFFBFFul; // -Max Normal
+            yield return 0x8400840084008400ul; // -Min Normal
+            yield return 0x83FF83FF83FF83FFul; // -Max Subnormal
+            yield return 0x8001800180018001ul; // -Min Subnormal
+            yield return 0x7BFF7BFF7BFF7BFFul; // +Max Normal
+            yield return 0x0400040004000400ul; // +Min Normal
+            yield return 0x03FF03FF03FF03FFul; // +Max Subnormal
+            yield return 0x0001000100010001ul; // +Min Subnormal
+
+            if (!NoZeros)
+            {
+                yield return 0x8000800080008000ul; // -Zero
+                yield return 0x0000000000000000ul; // +Zero
+            }
+
+            if (!NoInfs)
+            {
+                yield return 0xFC00FC00FC00FC00ul; // -Infinity
+                yield return 0x7C007C007C007C00ul; // +Infinity
+            }
+
+            if (!NoNaNs)
+            {
+                yield return 0xFE00FE00FE00FE00ul; // -QNaN (all zeros payload)
+                yield return 0xFDFFFDFFFDFFFDFFul; // -SNaN (all ones  payload)
+                yield return 0x7E007E007E007E00ul; // +QNaN (all zeros payload) (DefaultNaN)
+                yield return 0x7DFF7DFF7DFF7DFFul; // +SNaN (all ones  payload)
+            }
+
+            for (int Cnt = 1; Cnt <= RndCnt; Cnt++)
+            {
+                uint Rnd1 = (uint)GenNormal_H();
+                uint Rnd2 = (uint)GenSubnormal_H();
+
+                yield return (Rnd1 << 48) | (Rnd1 << 32) | (Rnd1 << 16) | Rnd1;
+                yield return (Rnd2 << 48) | (Rnd2 << 32) | (Rnd2 << 16) | Rnd2;
+            }
+        }
+
         private static IEnumerable<ulong> _1S_F_()
         {
             yield return 0x00000000FF7FFFFFul; // -Max Normal    (float.MinValue)
@@ -265,6 +306,38 @@ namespace Ryujinx.Tests.Cpu
             };
         }
 
+        private static uint[] _F_Cvtl_V_4H4S_8H4S_()
+        {
+            return new uint[]
+            {
+                0x0E217800u // FCVTL V0.4S, V0.4H
+            };
+        }
+
+        private static uint[] _F_Cvtl_V_2S2D_4S2D_()
+        {
+            return new uint[]
+            {
+                0x0E617800u // FCVTL V0.2D, V0.2S
+            };
+        }
+
+        private static uint[] _F_Cvtn_V_4S4H_4S8H_()
+        {
+            return new uint[]
+            {
+                0x0E216800u // FCVTN V0.4H, V0.4S
+            };
+        }
+
+        private static uint[] _F_Cvtn_V_2D2S_2D4S_()
+        {
+            return new uint[]
+            {
+                0x0E616800u // FCVTN V0.2S, V0.2D
+            };
+        }
+
         private static uint[] _F_Recpx_Sqrt_S_S_()
         {
             return new uint[]
@@ -889,6 +962,100 @@ namespace Ryujinx.Tests.Cpu
             CompareAgainstUnicorn();
         }
 
+        [Test, Pairwise] [Explicit]
+        public void F_Cvtl_V_4H4S_8H4S([ValueSource("_F_Cvtl_V_4H4S_8H4S_")] uint Opcodes,
+                                       [Values(0u)]     uint Rd,
+                                       [Values(1u, 0u)] uint Rn,
+                                       [ValueSource("_4H_F_")] ulong Z,
+                                       [ValueSource("_4H_F_")] ulong A,
+                                       [Values(0b0u, 0b1u)] uint Q, // <4H, 8H>
+                                       [Values(RMode.RN)] RMode RMode)
+        {
+            Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcodes |= ((Q & 1) << 30);
+
+            Vector128<float> V0 = MakeVectorE0E1(Q == 0u ? Z : 0ul, Q == 1u ? Z : 0ul);
+            Vector128<float> V1 = MakeVectorE0E1(Q == 0u ? A : 0ul, Q == 1u ? A : 0ul);
+
+            int Rnd = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            int Fpcr = (int)RMode << (int)FPCR.RMode;
+            Fpcr |= Rnd & (1 << (int)FPCR.FZ);
+            Fpcr |= Rnd & (1 << (int)FPCR.DN);
+            Fpcr |= Rnd & (1 << (int)FPCR.AHP);
+
+            AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, Fpcr: Fpcr);
+
+            CompareAgainstUnicorn(FpsrMask: FPSR.IOC | FPSR.OFC | FPSR.UFC | FPSR.IXC);
+        }
+
+        [Test, Pairwise] [Explicit]
+        public void F_Cvtl_V_2S2D_4S2D([ValueSource("_F_Cvtl_V_2S2D_4S2D_")] uint Opcodes,
+                                       [Values(0u)]     uint Rd,
+                                       [Values(1u, 0u)] uint Rn,
+                                       [ValueSource("_2S_F_")] ulong Z,
+                                       [ValueSource("_2S_F_")] ulong A,
+                                       [Values(0b0u, 0b1u)] uint Q, // <2S, 4S>
+                                       [Values(RMode.RN)] RMode RMode)
+        {
+            Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcodes |= ((Q & 1) << 30);
+
+            Vector128<float> V0 = MakeVectorE0E1(Q == 0u ? Z : 0ul, Q == 1u ? Z : 0ul);
+            Vector128<float> V1 = MakeVectorE0E1(Q == 0u ? A : 0ul, Q == 1u ? A : 0ul);
+
+            AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise] [Explicit]
+        public void F_Cvtn_V_4S4H_4S8H([ValueSource("_F_Cvtn_V_4S4H_4S8H_")] uint Opcodes,
+                                       [Values(0u)]     uint Rd,
+                                       [Values(1u, 0u)] uint Rn,
+                                       [ValueSource("_2S_F_")] ulong Z,
+                                       [ValueSource("_2S_F_")] ulong A,
+                                       [Values(0b0u, 0b1u)] uint Q, // <4H, 8H>
+                                       [Values(RMode.RN)] RMode RMode) // Unicorn seems to default all rounding modes to RMode.RN.
+        {
+            Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcodes |= ((Q & 1) << 30);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+
+            int Rnd = (int)TestContext.CurrentContext.Random.NextUInt();
+
+            int Fpcr = (int)RMode << (int)FPCR.RMode;
+            Fpcr |= Rnd & (1 << (int)FPCR.FZ);
+            Fpcr |= Rnd & (1 << (int)FPCR.DN);
+            Fpcr |= Rnd & (1 << (int)FPCR.AHP);
+
+            AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, Fpcr: Fpcr);
+
+            CompareAgainstUnicorn(FpsrMask: FPSR.IOC | FPSR.OFC | FPSR.UFC | FPSR.IXC | FPSR.IDC);
+        }
+
+        [Test, Pairwise] [Explicit]
+        public void F_Cvtn_V_2D2S_2D4S([ValueSource("_F_Cvtn_V_2D2S_2D4S_")] uint Opcodes,
+                                       [Values(0u)]     uint Rd,
+                                       [Values(1u, 0u)] uint Rn,
+                                       [ValueSource("_1D_F_")] ulong Z,
+                                       [ValueSource("_1D_F_")] ulong A,
+                                       [Values(0b0u, 0b1u)] uint Q, // <2S, 4S>
+                                       [Values(RMode.RN)] RMode RMode) // Unicorn seems to default all rounding modes to RMode.RN.
+        {
+            Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Opcodes |= ((Q & 1) << 30);
+
+            Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+            Vector128<float> V1 = MakeVectorE0E1(A, A);
+
+            AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1);
+
+            CompareAgainstUnicorn();
+        }
+
         [Test, Pairwise] [Explicit]
         public void F_Recpx_Sqrt_S_S([ValueSource("_F_Recpx_Sqrt_S_S_")] uint Opcodes,
                                      [ValueSource("_1S_F_")] ulong A)
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs b/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs
deleted file mode 100644
index 3c8ad0711c..0000000000
--- a/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs
+++ /dev/null
@@ -1,43 +0,0 @@
-using ChocolArm64.State;
-
-using NUnit.Framework;
-
-using System.Runtime.Intrinsics;
-using System.Runtime.Intrinsics.X86;
-
-namespace Ryujinx.Tests.Cpu
-{
-    public class CpuTestSimdCvt : CpuTest
-    {
-        [TestCase((ushort)0x0000, 0x00000000u)] // Positive Zero
-        [TestCase((ushort)0x8000, 0x80000000u)] // Negative Zero
-        [TestCase((ushort)0x3E00, 0x3FC00000u)] // +1.5
-        [TestCase((ushort)0xBE00, 0xBFC00000u)] // -1.5
-        [TestCase((ushort)0xFFFF, 0xFFFFE000u)] // -QNaN
-        [TestCase((ushort)0x7C00, 0x7F800000u)] // +Inf
-        [TestCase((ushort)0x3C00, 0x3F800000u)] // 1.0
-        [TestCase((ushort)0x3C01, 0x3F802000u)] // 1.0009765625
-        [TestCase((ushort)0xC000, 0xC0000000u)] // -2.0
-        [TestCase((ushort)0x7BFF, 0x477FE000u)] // 65504.0 (Largest Normal)
-        [TestCase((ushort)0x03FF, 0x387FC000u)] // 0.00006097555 (Largest Subnormal)
-        [TestCase((ushort)0x0001, 0x33800000u)] // 5.96046448e-8 (Smallest Subnormal)
-        public void Fcvtl_V_f16(ushort Value, uint Result)
-        {
-            uint Opcode = 0x0E217801; // FCVTL V1.4S, V0.4H
-
-            Vector128<float> V0 = Sse.StaticCast<ushort, float>(Sse2.SetAllVector128(Value));
-
-            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0);
-
-            Assert.Multiple(() =>
-            {
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V1), (byte)0), Is.EqualTo(Result));
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V1), (byte)1), Is.EqualTo(Result));
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V1), (byte)2), Is.EqualTo(Result));
-                Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V1), (byte)3), Is.EqualTo(Result));
-            });
-
-            CompareAgainstUnicorn();
-        }
-    }
-}
diff --git a/Ryujinx.Tests/Ryujinx.Tests.csproj b/Ryujinx.Tests/Ryujinx.Tests.csproj
index 27c2bf7866..fb21c6863d 100644
--- a/Ryujinx.Tests/Ryujinx.Tests.csproj
+++ b/Ryujinx.Tests/Ryujinx.Tests.csproj
@@ -16,8 +16,8 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="15.8.0" />
-    <PackageReference Include="NUnit" Version="3.10.1" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="15.9.0" />
+    <PackageReference Include="NUnit" Version="3.11.0" />
     <PackageReference Include="NUnit3TestAdapter" Version="3.10.0" />
     <PackageReference Include="System.Runtime.Intrinsics.Experimental" Version="4.5.0-rc1" />
   </ItemGroup>