From 2be8b6ea4527239fff1b2fdf9dc7ce1346b173e6 Mon Sep 17 00:00:00 2001
From: Domenico V <35856442+LDj3SNuD@users.noreply.github.com>
Date: Mon, 31 Jul 2023 01:57:37 +0200
Subject: [PATCH] CPU (A64): Add Fmaxp & Fminp Scalar Inst.s, Fast & Slow
 Paths; with Tests. (#5502)

* Add Fmaxp & Fminp Scalar Inst.s, Fast & Slow Paths; with Tests.

* Ptc.InternalVersion = 5502
---
 src/ARMeilleure/Decoders/OpCodeTable.cs       |  2 +
 .../Instructions/InstEmitSimdArithmetic.cs    | 50 +++++++++++++++++++
 src/ARMeilleure/Instructions/InstName.cs      |  2 +
 src/ARMeilleure/Translation/PTC/Ptc.cs        |  2 +-
 src/Ryujinx.Tests/Cpu/CpuTestSimd.cs          |  4 ++
 5 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/src/ARMeilleure/Decoders/OpCodeTable.cs b/src/ARMeilleure/Decoders/OpCodeTable.cs
index 5cfd0bb81e..9e13bd9b56 100644
--- a/src/ARMeilleure/Decoders/OpCodeTable.cs
+++ b/src/ARMeilleure/Decoders/OpCodeTable.cs
@@ -330,6 +330,7 @@ namespace ARMeilleure.Decoders
             SetA64("011111100x110000110010xxxxxxxxxx", InstName.Fmaxnmp_S,       InstEmit.Fmaxnmp_S,       OpCodeSimd.Create);
             SetA64("0>1011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnmp_V,       InstEmit.Fmaxnmp_V,       OpCodeSimdReg.Create);
             SetA64("0110111000110000110010xxxxxxxxxx", InstName.Fmaxnmv_V,       InstEmit.Fmaxnmv_V,       OpCodeSimd.Create);
+            SetA64("011111100x110000111110xxxxxxxxxx", InstName.Fmaxp_S,         InstEmit.Fmaxp_S,         OpCodeSimd.Create);
             SetA64("0>1011100<1xxxxx111101xxxxxxxxxx", InstName.Fmaxp_V,         InstEmit.Fmaxp_V,         OpCodeSimdReg.Create);
             SetA64("0110111000110000111110xxxxxxxxxx", InstName.Fmaxv_V,         InstEmit.Fmaxv_V,         OpCodeSimd.Create);
             SetA64("000111100x1xxxxx010110xxxxxxxxxx", InstName.Fmin_S,          InstEmit.Fmin_S,          OpCodeSimdReg.Create);
@@ -339,6 +340,7 @@ namespace ARMeilleure.Decoders
             SetA64("011111101x110000110010xxxxxxxxxx", InstName.Fminnmp_S,       InstEmit.Fminnmp_S,       OpCodeSimd.Create);
             SetA64("0>1011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnmp_V,       InstEmit.Fminnmp_V,       OpCodeSimdReg.Create);
             SetA64("0110111010110000110010xxxxxxxxxx", InstName.Fminnmv_V,       InstEmit.Fminnmv_V,       OpCodeSimd.Create);
+            SetA64("011111101x110000111110xxxxxxxxxx", InstName.Fminp_S,         InstEmit.Fminp_S,         OpCodeSimd.Create);
             SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", InstName.Fminp_V,         InstEmit.Fminp_V,         OpCodeSimdReg.Create);
             SetA64("0110111010110000111110xxxxxxxxxx", InstName.Fminv_V,         InstEmit.Fminv_V,         OpCodeSimd.Create);
             SetA64("010111111xxxxxxx0001x0xxxxxxxxxx", InstName.Fmla_Se,         InstEmit.Fmla_Se,         OpCodeSimdRegElemF.Create);
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
index 7b308fa964..543aab0236 100644
--- a/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
+++ b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
@@ -883,6 +883,31 @@ namespace ARMeilleure.Instructions
             }
         }
 
+        public static void Fmaxp_S(ArmEmitterContext context)
+        {
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FmaxpS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
+            {
+                EmitSse2ScalarPairwiseOpF(context, (op1, op2) =>
+                {
+                    return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+                    {
+                        return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
+                    }, scalar: true, op1, op2);
+                });
+            }
+            else
+            {
+                EmitScalarPairwiseOpF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2);
+                });
+            }
+        }
+
         public static void Fmaxp_V(ArmEmitterContext context)
         {
             if (Optimizations.UseAdvSimd)
@@ -1081,6 +1106,31 @@ namespace ARMeilleure.Instructions
             }
         }
 
+        public static void Fminp_S(ArmEmitterContext context)
+        {
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FminpS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
+            {
+                EmitSse2ScalarPairwiseOpF(context, (op1, op2) =>
+                {
+                    return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+                    {
+                        return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
+                    }, scalar: true, op1, op2);
+                });
+            }
+            else
+            {
+                EmitScalarPairwiseOpF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2);
+                });
+            }
+        }
+
         public static void Fminp_V(ArmEmitterContext context)
         {
             if (Optimizations.UseAdvSimd)
diff --git a/src/ARMeilleure/Instructions/InstName.cs b/src/ARMeilleure/Instructions/InstName.cs
index fd71d92e67..32ae38dad3 100644
--- a/src/ARMeilleure/Instructions/InstName.cs
+++ b/src/ARMeilleure/Instructions/InstName.cs
@@ -228,6 +228,7 @@ namespace ARMeilleure.Instructions
         Fmaxnmp_S,
         Fmaxnmp_V,
         Fmaxnmv_V,
+        Fmaxp_S,
         Fmaxp_V,
         Fmaxv_V,
         Fmin_S,
@@ -237,6 +238,7 @@ namespace ARMeilleure.Instructions
         Fminnmp_S,
         Fminnmp_V,
         Fminnmv_V,
+        Fminp_S,
         Fminp_V,
         Fminv_V,
         Fmla_Se,
diff --git a/src/ARMeilleure/Translation/PTC/Ptc.cs b/src/ARMeilleure/Translation/PTC/Ptc.cs
index 14d4e471fc..ce653383e6 100644
--- a/src/ARMeilleure/Translation/PTC/Ptc.cs
+++ b/src/ARMeilleure/Translation/PTC/Ptc.cs
@@ -29,7 +29,7 @@ namespace ARMeilleure.Translation.PTC
         private const string OuterHeaderMagicString = "PTCohd\0\0";
         private const string InnerHeaderMagicString = "PTCihd\0\0";
 
-        private const uint InternalVersion = 5343; //! To be incremented manually for each change to the ARMeilleure project.
+        private const uint InternalVersion = 5502; //! To be incremented manually for each change to the ARMeilleure project.
 
         private const string ActualDir = "0";
         private const string BackupDir = "1";
diff --git a/src/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/src/Ryujinx.Tests/Cpu/CpuTestSimd.cs
index 4c568a8f4b..eb763618d9 100644
--- a/src/Ryujinx.Tests/Cpu/CpuTestSimd.cs
+++ b/src/Ryujinx.Tests/Cpu/CpuTestSimd.cs
@@ -764,7 +764,9 @@ namespace Ryujinx.Tests.Cpu
             {
                 0x7E30D820u, // FADDP   S0, V1.2S
                 0x7E30C820u, // FMAXNMP S0, V1.2S
+                0x7E30F820u, // FMAXP   S0, V1.2S
                 0x7EB0C820u, // FMINNMP S0, V1.2S
+                0x7EB0F820u, // FMINP   S0, V1.2S
             };
         }
 
@@ -774,7 +776,9 @@ namespace Ryujinx.Tests.Cpu
             {
                 0x7E70D820u, // FADDP   D0, V1.2D
                 0x7E70C820u, // FMAXNMP D0, V1.2D
+                0x7E70F820u, // FMAXP   D0, V1.2D
                 0x7EF0C820u, // FMINNMP D0, V1.2D
+                0x7EF0F820u, // FMINP   D0, V1.2D
             };
         }