From 74da8785a5f3a79914182d384e966fb5d27fa708 Mon Sep 17 00:00:00 2001
From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>
Date: Sun, 21 Apr 2019 04:07:35 +0200
Subject: [PATCH] Sse optimized the 32-bit Vector & Scalar integer-to-fp
 conversion instructions (signed & unsigned); added the related Gp & V_Fixed
 Tests (signed & unsigned). (#662)

* Update CpuTestSimdCvt.cs

* Update CpuTestSimd.cs

* Update CpuTestSimdShImm.cs

* Update InstEmitSimdCvt.cs

* Update OpCodeTable.cs

* Update InstEmitSimdCvt.cs
---
 ChocolArm64/Instructions/InstEmitSimdCvt.cs | 191 +++++++++++++++++---
 ChocolArm64/OpCodeTable.cs                  |   4 +
 Ryujinx.Tests/Cpu/CpuTestSimd.cs            |   4 +-
 Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs         | 106 ++++++++++-
 Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs       |  62 +++++++
 5 files changed, 342 insertions(+), 25 deletions(-)

diff --git a/ChocolArm64/Instructions/InstEmitSimdCvt.cs b/ChocolArm64/Instructions/InstEmitSimdCvt.cs
index ab2fb6a833..c5f16f86cb 100644
--- a/ChocolArm64/Instructions/InstEmitSimdCvt.cs
+++ b/ChocolArm64/Instructions/InstEmitSimdCvt.cs
@@ -363,7 +363,7 @@ namespace ChocolArm64.Instructions
 
             if (context.CurrOp.RegisterSize == RegisterSize.Int32)
             {
-                context.Emit(OpCodes.Conv_U4);
+                context.Emit(OpCodes.Conv_I4);
             }
 
             EmitFloatCast(context, op.Size);
@@ -393,11 +393,20 @@ namespace ChocolArm64.Instructions
         {
             OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
 
-            EmitVectorExtractSx(context, op.Rn, 0, op.Size + 2);
+            int sizeF = op.Size & 1;
 
-            EmitFloatCast(context, op.Size);
+            if (Optimizations.UseSse2 && sizeF == 0)
+            {
+                EmitSse2cvtF_Signed(context, scalar: true);
+            }
+            else
+            {
+                EmitVectorExtractSx(context, op.Rn, 0, sizeF + 2);
 
-            EmitScalarSetF(context, op.Rd, op.Size);
+                EmitFloatCast(context, sizeF);
+
+                EmitScalarSetF(context, op.Rd, sizeF);
+            }
         }
 
         public static void Scvtf_V(ILEmitterCtx context)
@@ -408,18 +417,24 @@ namespace ChocolArm64.Instructions
 
             if (Optimizations.UseSse2 && sizeF == 0)
             {
-                Type[] typesCvt = new Type[] { typeof(Vector128<int>) };
+                EmitSse2cvtF_Signed(context, scalar: false);
+            }
+            else
+            {
+                EmitVectorCvtf(context, signed: true);
+            }
+        }
 
-                context.EmitLdvec(op.Rn);
+        public static void Scvtf_V_Fixed(ILEmitterCtx context)
+        {
+            OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
 
-                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Single), typesCvt));
+            // sizeF == ((OpCodeSimdShImm64)op).Size - 2
+            int sizeF = op.Size & 1;
 
-                context.EmitStvec(op.Rd);
-
-                if (op.RegisterSize == RegisterSize.Simd64)
-                {
-                    EmitVectorZeroUpper(context, op.Rd);
-                }
+            if (Optimizations.UseSse2 && sizeF == 0)
+            {
+                EmitSse2cvtF_Signed(context, scalar: false);
             }
             else
             {
@@ -469,18 +484,55 @@ namespace ChocolArm64.Instructions
         {
             OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
 
-            EmitVectorExtractZx(context, op.Rn, 0, op.Size + 2);
+            int sizeF = op.Size & 1;
 
-            context.Emit(OpCodes.Conv_R_Un);
+            if (Optimizations.UseSse2 && sizeF == 0)
+            {
+                EmitSse2cvtF_Unsigned(context, scalar: true);
+            }
+            else
+            {
+                EmitVectorExtractZx(context, op.Rn, 0, sizeF + 2);
 
-            EmitFloatCast(context, op.Size);
+                context.Emit(OpCodes.Conv_R_Un);
 
-            EmitScalarSetF(context, op.Rd, op.Size);
+                EmitFloatCast(context, sizeF);
+
+                EmitScalarSetF(context, op.Rd, sizeF);
+            }
         }
 
         public static void Ucvtf_V(ILEmitterCtx context)
         {
-            EmitVectorCvtf(context, signed: false);
+            OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
+
+            int sizeF = op.Size & 1;
+
+            if (Optimizations.UseSse2 && sizeF == 0)
+            {
+                EmitSse2cvtF_Unsigned(context, scalar: false);
+            }
+            else
+            {
+                EmitVectorCvtf(context, signed: false);
+            }
+        }
+
+        public static void Ucvtf_V_Fixed(ILEmitterCtx context)
+        {
+            OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
+
+            // sizeF == ((OpCodeSimdShImm64)op).Size - 2
+            int sizeF = op.Size & 1;
+
+            if (Optimizations.UseSse2 && sizeF == 0)
+            {
+                EmitSse2cvtF_Unsigned(context, scalar: false);
+            }
+            else
+            {
+                EmitVectorCvtf(context, signed: false);
+            }
         }
 
         private static void EmitFcvtn(ILEmitterCtx context, bool signed, bool scalar)
@@ -838,7 +890,7 @@ namespace ChocolArm64.Instructions
                     int fBits = GetImmShr(fixedOp);
 
                     // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
-                    int fpScaled = 0x40000000 + (fBits - 1) * 0x800000;
+                    int fpScaled = 0x3F800000 + fBits * 0x800000;
 
                     context.EmitLdc_I4(fpScaled);
                     context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
@@ -894,7 +946,7 @@ namespace ChocolArm64.Instructions
                     int fBits = GetImmShr(fixedOp);
 
                     // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
-                    long fpScaled = 0x4000000000000000L + (fBits - 1) * 0x10000000000000L;
+                    long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
 
                     context.EmitLdc_I8(fpScaled);
                     context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
@@ -972,7 +1024,7 @@ namespace ChocolArm64.Instructions
                     int fBits = GetImmShr(fixedOp);
 
                     // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
-                    int fpScaled = 0x40000000 + (fBits - 1) * 0x800000;
+                    int fpScaled = 0x3F800000 + fBits * 0x800000;
 
                     context.EmitLdc_I4(fpScaled);
                     context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
@@ -1060,7 +1112,7 @@ namespace ChocolArm64.Instructions
                     int fBits = GetImmShr(fixedOp);
 
                     // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
-                    long fpScaled = 0x4000000000000000L + (fBits - 1) * 0x10000000000000L;
+                    long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
 
                     context.EmitLdc_I8(fpScaled);
                     context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
@@ -1158,6 +1210,101 @@ namespace ChocolArm64.Instructions
             }
         }
 
+        private static void EmitSse2cvtF_Signed(ILEmitterCtx context, bool scalar)
+        {
+            OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
+
+            Type[] typesMul = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
+            Type[] typesCvt = new Type[] { typeof(Vector128<int>) };
+            Type[] typesSav = new Type[] { typeof(int) };
+
+            context.EmitLdvec(op.Rn);
+
+            context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Single), typesCvt));
+
+            if (op is OpCodeSimdShImm64 fixedOp)
+            {
+                int fBits = GetImmShr(fixedOp);
+
+                // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
+                int fpScaled = 0x3F800000 - fBits * 0x800000;
+
+                context.EmitLdc_I4(fpScaled);
+                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+                context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMul));
+            }
+
+            context.EmitStvec(op.Rd);
+
+            if (scalar)
+            {
+                EmitVectorZero32_128(context, op.Rd);
+            }
+            else if (op.RegisterSize == RegisterSize.Simd64)
+            {
+                EmitVectorZeroUpper(context, op.Rd);
+            }
+        }
+
+        private static void EmitSse2cvtF_Unsigned(ILEmitterCtx context, bool scalar)
+        {
+            OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
+
+            Type[] typesMulAdd = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
+            Type[] typesSrlSll = new Type[] { typeof(Vector128<int>),   typeof(byte) };
+            Type[] typesCvt    = new Type[] { typeof(Vector128<int>) };
+            Type[] typesSav    = new Type[] { typeof(int) };
+
+            context.EmitLdvec(op.Rn);
+
+            context.EmitLdc_I4(16);
+            context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrlSll));
+
+            context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Single), typesCvt));
+
+            context.EmitLdc_I4(0x47800000); // 65536.0f (1 << 16)
+            context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+            context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulAdd));
+
+            context.EmitLdvec(op.Rn);
+
+            context.EmitLdc_I4(16);
+            context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSrlSll));
+
+            context.EmitLdc_I4(16);
+            context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrlSll));
+
+            context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Single), typesCvt));
+
+            context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Add), typesMulAdd));
+
+            if (op is OpCodeSimdShImm64 fixedOp)
+            {
+                int fBits = GetImmShr(fixedOp);
+
+                // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
+                int fpScaled = 0x3F800000 - fBits * 0x800000;
+
+                context.EmitLdc_I4(fpScaled);
+                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+                context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulAdd));
+            }
+
+            context.EmitStvec(op.Rd);
+
+            if (scalar)
+            {
+                EmitVectorZero32_128(context, op.Rd);
+            }
+            else if (op.RegisterSize == RegisterSize.Simd64)
+            {
+                EmitVectorZeroUpper(context, op.Rd);
+            }
+        }
+
         private static string GetSse41NameRnd(RoundMode roundMode)
         {
             switch (roundMode)
diff --git a/ChocolArm64/OpCodeTable.cs b/ChocolArm64/OpCodeTable.cs
index 50bc6a1d6d..fb8b19cd19 100644
--- a/ChocolArm64/OpCodeTable.cs
+++ b/ChocolArm64/OpCodeTable.cs
@@ -439,6 +439,8 @@ namespace ChocolArm64
             SetA64(">00111100x000010>xxxxxxxxxxxxxxx", InstEmit.Scvtf_Gp_Fixed,  typeof(OpCodeSimdCvt64));
             SetA64("010111100x100001110110xxxxxxxxxx", InstEmit.Scvtf_S,         typeof(OpCodeSimd64));
             SetA64("0>0011100<100001110110xxxxxxxxxx", InstEmit.Scvtf_V,         typeof(OpCodeSimd64));
+            SetA64("0x001111001xxxxx111001xxxxxxxxxx", InstEmit.Scvtf_V_Fixed,   typeof(OpCodeSimdShImm64));
+            SetA64("0100111101xxxxxx111001xxxxxxxxxx", InstEmit.Scvtf_V_Fixed,   typeof(OpCodeSimdShImm64));
             SetA64("01011110000xxxxx000000xxxxxxxxxx", InstEmit.Sha1c_V,         typeof(OpCodeSimdReg64));
             SetA64("0101111000101000000010xxxxxxxxxx", InstEmit.Sha1h_V,         typeof(OpCodeSimd64));
             SetA64("01011110000xxxxx001000xxxxxxxxxx", InstEmit.Sha1m_V,         typeof(OpCodeSimdReg64));
@@ -548,6 +550,8 @@ namespace ChocolArm64
             SetA64(">00111100x000011>xxxxxxxxxxxxxxx", InstEmit.Ucvtf_Gp_Fixed,  typeof(OpCodeSimdCvt64));
             SetA64("011111100x100001110110xxxxxxxxxx", InstEmit.Ucvtf_S,         typeof(OpCodeSimd64));
             SetA64("0>1011100<100001110110xxxxxxxxxx", InstEmit.Ucvtf_V,         typeof(OpCodeSimd64));
+            SetA64("0x101111001xxxxx111001xxxxxxxxxx", InstEmit.Ucvtf_V_Fixed,   typeof(OpCodeSimdShImm64));
+            SetA64("0110111101xxxxxx111001xxxxxxxxxx", InstEmit.Ucvtf_V_Fixed,   typeof(OpCodeSimdShImm64));
             SetA64("0x101110<<1xxxxx000001xxxxxxxxxx", InstEmit.Uhadd_V,         typeof(OpCodeSimdReg64));
             SetA64("0x101110<<1xxxxx001001xxxxxxxxxx", InstEmit.Uhsub_V,         typeof(OpCodeSimdReg64));
             SetA64("0x101110<<1xxxxx011001xxxxxxxxxx", InstEmit.Umax_V,          typeof(OpCodeSimdReg64));
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs
index fd395da8ef..df23f2eff8 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs
@@ -2082,7 +2082,7 @@ namespace Ryujinx.Tests.Cpu
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
-            CompareAgainstUnicorn();
+            CompareAgainstUnicorn(fpTolerances: FpTolerances.UpToOneUlpsD); // unsigned
         }
 
         [Test, Pairwise] [Explicit]
@@ -2118,7 +2118,7 @@ namespace Ryujinx.Tests.Cpu
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
-            CompareAgainstUnicorn();
+            CompareAgainstUnicorn(fpTolerances: FpTolerances.UpToOneUlpsD); // unsigned
         }
 
         [Test, Pairwise]
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs b/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs
index 60935488ef..ff8e802708 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs
@@ -288,6 +288,42 @@ namespace Ryujinx.Tests.Cpu
             };
         }
 
+        private static uint[] _SU_Cvt_F_Gp_WS_()
+        {
+            return new uint[]
+            {
+                0x1E220000u, // SCVTF S0, W0
+                0x1E230000u  // UCVTF S0, W0
+            };
+        }
+
+        private static uint[] _SU_Cvt_F_Gp_WD_()
+        {
+            return new uint[]
+            {
+                0x1E620000u, // SCVTF D0, W0
+                0x1E630000u  // UCVTF D0, W0
+            };
+        }
+
+        private static uint[] _SU_Cvt_F_Gp_XS_()
+        {
+            return new uint[]
+            {
+                0x9E220000u, // SCVTF S0, X0
+                0x9E230000u  // UCVTF S0, X0
+            };
+        }
+
+        private static uint[] _SU_Cvt_F_Gp_XD_()
+        {
+            return new uint[]
+            {
+                0x9E620000u, // SCVTF D0, X0
+                0x9E630000u  // UCVTF D0, X0
+            };
+        }
+
         private static uint[] _SU_Cvt_F_Gp_Fixed_WS_()
         {
             return new uint[]
@@ -480,6 +516,74 @@ namespace Ryujinx.Tests.Cpu
             CompareAgainstUnicorn();
         }
 
+        [Test, Pairwise] [Explicit]
+        public void SU_Cvt_F_Gp_WS([ValueSource("_SU_Cvt_F_Gp_WS_")] uint opcodes,
+                                   [Values(0u)]      uint rd,
+                                   [Values(1u, 31u)] uint rn,
+                                   [ValueSource("_W_")] [Random(RndCnt)] uint wn)
+        {
+            opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
+
+            uint  w31 = TestContext.CurrentContext.Random.NextUInt();
+            ulong z   = TestContext.CurrentContext.Random.NextULong();
+            Vector128<float> v0 = MakeVectorE0E1(z, z);
+
+            SingleOpcode(opcodes, x1: wn, x31: w31, v0: v0);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise] [Explicit]
+        public void SU_Cvt_F_Gp_WD([ValueSource("_SU_Cvt_F_Gp_WD_")] uint opcodes,
+                                   [Values(0u)]      uint rd,
+                                   [Values(1u, 31u)] uint rn,
+                                   [ValueSource("_W_")] [Random(RndCnt)] uint wn)
+        {
+            opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
+
+            uint  w31 = TestContext.CurrentContext.Random.NextUInt();
+            ulong z   = TestContext.CurrentContext.Random.NextULong();
+            Vector128<float> v0 = MakeVectorE1(z);
+
+            SingleOpcode(opcodes, x1: wn, x31: w31, v0: v0);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise] [Explicit]
+        public void SU_Cvt_F_Gp_XS([ValueSource("_SU_Cvt_F_Gp_XS_")] uint opcodes,
+                                   [Values(0u)]      uint rd,
+                                   [Values(1u, 31u)] uint rn,
+                                   [ValueSource("_X_")] [Random(RndCnt)] ulong xn)
+        {
+            opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
+
+            ulong x31 = TestContext.CurrentContext.Random.NextULong();
+            ulong z   = TestContext.CurrentContext.Random.NextULong();
+            Vector128<float> v0 = MakeVectorE0E1(z, z);
+
+            SingleOpcode(opcodes, x1: xn, x31: x31, v0: v0);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise] [Explicit]
+        public void SU_Cvt_F_Gp_XD([ValueSource("_SU_Cvt_F_Gp_XD_")] uint opcodes,
+                                   [Values(0u)]      uint rd,
+                                   [Values(1u, 31u)] uint rn,
+                                   [ValueSource("_X_")] [Random(RndCnt)] ulong xn)
+        {
+            opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
+
+            ulong x31 = TestContext.CurrentContext.Random.NextULong();
+            ulong z   = TestContext.CurrentContext.Random.NextULong();
+            Vector128<float> v0 = MakeVectorE1(z);
+
+            SingleOpcode(opcodes, x1: xn, x31: x31, v0: v0);
+
+            CompareAgainstUnicorn(fpTolerances: FpTolerances.UpToOneUlpsD); // unsigned
+        }
+
         [Test, Pairwise] [Explicit]
         public void SU_Cvt_F_Gp_Fixed_WS([ValueSource("_SU_Cvt_F_Gp_Fixed_WS_")] uint opcodes,
                                          [Values(0u)]      uint rd,
@@ -561,7 +665,7 @@ namespace Ryujinx.Tests.Cpu
 
             SingleOpcode(opcodes, x1: xn, x31: x31, v0: v0);
 
-            CompareAgainstUnicorn();
+            CompareAgainstUnicorn(fpTolerances: FpTolerances.UpToOneUlpsD); // unsigned
         }
 #endif
     }
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs b/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs
index cabaac0220..c08949a5ae 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs
@@ -194,6 +194,24 @@ namespace Ryujinx.Tests.Cpu
             };
         }
 
+        private static uint[] _SU_Cvt_F_V_Fixed_2S_4S_()
+        {
+            return new uint[]
+            {
+                0x0F20E400u, // SCVTF V0.2S, V0.2S, #32
+                0x2F20E400u  // UCVTF V0.2S, V0.2S, #32
+            };
+        }
+
+        private static uint[] _SU_Cvt_F_V_Fixed_2D_()
+        {
+            return new uint[]
+            {
+                0x4F40E400u, // SCVTF V0.2D, V0.2D, #64
+                0x6F40E400u  // UCVTF V0.2D, V0.2D, #64
+            };
+        }
+
         private static uint[] _SU_Shll_V_8B8H_16B8H_()
         {
             return new uint[]
@@ -454,6 +472,50 @@ namespace Ryujinx.Tests.Cpu
             CompareAgainstUnicorn();
         }
 
+        [Test, Pairwise] [Explicit]
+        public void SU_Cvt_F_V_Fixed_2S_4S([ValueSource("_SU_Cvt_F_V_Fixed_2S_4S_")] uint opcodes,
+                                           [Values(0u)]     uint rd,
+                                           [Values(1u, 0u)] uint rn,
+                                           [ValueSource("_2S_")] [Random(RndCnt)] ulong z,
+                                           [ValueSource("_2S_")] [Random(RndCnt)] ulong a,
+                                           [Values(1u, 32u)] [Random(2u, 31u, RndCntFBits)] uint fBits,
+                                           [Values(0b0u, 0b1u)] uint q) // <2S, 4S>
+        {
+            uint immHb = (64 - fBits) & 0x7F;
+
+            opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
+            opcodes |= (immHb << 16);
+            opcodes |= ((q & 1) << 30);
+
+            Vector128<float> v0 = MakeVectorE0E1(z, z);
+            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
+
+            SingleOpcode(opcodes, v0: v0, v1: v1);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise] [Explicit]
+        public void SU_Cvt_F_V_Fixed_2D([ValueSource("_SU_Cvt_F_V_Fixed_2D_")] uint opcodes,
+                                        [Values(0u)]     uint rd,
+                                        [Values(1u, 0u)] uint rn,
+                                        [ValueSource("_1D_")] [Random(RndCnt)] ulong z,
+                                        [ValueSource("_1D_")] [Random(RndCnt)] ulong a,
+                                        [Values(1u, 64u)] [Random(2u, 63u, RndCntFBits)] uint fBits)
+        {
+            uint immHb = (128 - fBits) & 0x7F;
+
+            opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
+            opcodes |= (immHb << 16);
+
+            Vector128<float> v0 = MakeVectorE0E1(z, z);
+            Vector128<float> v1 = MakeVectorE0E1(a, a);
+
+            SingleOpcode(opcodes, v0: v0, v1: v1);
+
+            CompareAgainstUnicorn(fpTolerances: FpTolerances.UpToOneUlpsD); // unsigned
+        }
+
         [Test, Pairwise, Description("SHL <V><d>, <V><n>, #<shift>")]
         public void Shl_S_D([Values(0u)]     uint rd,
                             [Values(1u, 0u)] uint rn,