From 4f293f8cbec33e8edce81ad4980bd532a2464c05 Mon Sep 17 00:00:00 2001
From: merry <git@mary.rs>
Date: Sun, 22 Jan 2023 14:15:49 +0000
Subject: [PATCH] Arm64: Simplify TryEncodeBitMask and use for constants
 (#4328)

* Arm64: Simplify TryEncodeBitMask

* CodeGenerator: Use TryEncodeBitMask in GenerateConstantCopy

* Ptc: Bump version
---
 ARMeilleure/ARMeilleure.csproj               |   6 +
 ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs   | 120 +++----------------
 ARMeilleure/CodeGen/Arm64/CodeGenerator.cs   |  14 ++-
 ARMeilleure/Translation/PTC/Ptc.cs           |   2 +-
 Ryujinx.Tests/Cpu/Arm64CodeGenCommonTests.cs |  46 +++++++
 5 files changed, 81 insertions(+), 107 deletions(-)
 create mode 100644 Ryujinx.Tests/Cpu/Arm64CodeGenCommonTests.cs

diff --git a/ARMeilleure/ARMeilleure.csproj b/ARMeilleure/ARMeilleure.csproj
index 58fd04b38b..1c2135ed54 100644
--- a/ARMeilleure/ARMeilleure.csproj
+++ b/ARMeilleure/ARMeilleure.csproj
@@ -16,4 +16,10 @@
     </ContentWithTargetPath>
   </ItemGroup>
 
+  <ItemGroup>
+    <AssemblyAttribute Include="System.Runtime.CompilerServices.InternalsVisibleTo">
+      <_Parameter1>Ryujinx.Tests</_Parameter1>
+    </AssemblyAttribute>
+  </ItemGroup>
+
 </Project>
diff --git a/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs b/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs
index e67d2fdb7f..8d1e597ba0 100644
--- a/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs
+++ b/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs
@@ -1,5 +1,4 @@
 using ARMeilleure.IntermediateRepresentation;
-using System;
 using System.Numerics;
 
 namespace ARMeilleure.CodeGen.Arm64
@@ -32,9 +31,12 @@ namespace ARMeilleure.CodeGen.Arm64
 
         public static bool TryEncodeBitMask(Operand operand, out int immN, out int immS, out int immR)
         {
-            ulong value = operand.Value;
+            return TryEncodeBitMask(operand.Type, operand.Value, out immN, out immS, out immR);
+        }
 
-            if (operand.Type == OperandType.I32)
+        public static bool TryEncodeBitMask(OperandType type, ulong value, out int immN, out int immS, out int immR)
+        {
+            if (type == OperandType.I32)
             {
                 value |= value << 32;
             }
@@ -50,7 +52,7 @@ namespace ARMeilleure.CodeGen.Arm64
             // Any value AND all ones will be equal itself, so it's effectively a no-op.
             // Any value OR all ones will be equal all ones, so one can just use MOV.
             // Any value XOR all ones will be equal its inverse, so one can just use MVN.
-            if (value == ulong.MaxValue)
+            if (value == 0 || value == ulong.MaxValue)
             {
                 immN = 0;
                 immS = 0;
@@ -59,79 +61,18 @@ namespace ARMeilleure.CodeGen.Arm64
                 return false;
             }
 
-            int bitLength = CountSequence(value);
+            // Normalize value, rotating it such that the LSB is 1: Ensures we get a complete element that has not
+            // been cut-in-half across the word boundary.
+            int rotation = BitOperations.TrailingZeroCount(value & (value + 1));
+            ulong rotatedValue = ulong.RotateRight(value, rotation);
 
-            if ((value >> bitLength) != 0)
-            {
-                bitLength += CountSequence(value >> bitLength);
-            }
+            // Now that we have a complete element in the LSB with the LSB = 1, determine size and number of ones
+            // in element.
+            int elementSize = BitOperations.TrailingZeroCount(rotatedValue & (rotatedValue + 1));
+            int onesInElement = BitOperations.TrailingZeroCount(~rotatedValue);
 
-            int bitLengthLog2 = BitOperations.Log2((uint)bitLength);
-            int bitLengthPow2 = 1 << bitLengthLog2;
-
-            if (bitLengthPow2 < bitLength)
-            {
-                bitLengthLog2++;
-                bitLengthPow2 <<= 1;
-            }
-
-            int selectedESize = 64;
-            int repetitions = 1;
-            int onesCount = BitOperations.PopCount(value);
-
-            if (bitLengthPow2 < 64 && (value >> bitLengthPow2) != 0)
-            {
-                for (int eSizeLog2 = bitLengthLog2; eSizeLog2 < 6; eSizeLog2++)
-                {
-                    bool match = true;
-                    int eSize = 1 << eSizeLog2;
-                    ulong mask = (1UL << eSize) - 1;
-                    ulong eValue = value & mask;
-
-                    for (int e = 1; e < 64 / eSize; e++)
-                    {
-                        if (((value >> (e * eSize)) & mask) != eValue)
-                        {
-                            match = false;
-                            break;
-                        }
-                    }
-
-                    if (match)
-                    {
-                        selectedESize = eSize;
-                        repetitions = 64 / eSize;
-                        onesCount = BitOperations.PopCount(eValue);
-                        break;
-                    }
-                }
-            }
-
-            // Find rotation. We have two cases, one where the highest bit is 0
-            // and one where it is 1.
-            // If it's 1, we just need to count the number of 1 bits on the MSB to find the right rotation.
-            // If it's 0, we just need to count the number of 0 bits on the LSB to find the left rotation,
-            // then we can convert it to the right rotation shift by subtracting the value from the element size.
-            int rotation;
-            long vHigh = (long)(value << (64 - selectedESize));
-            if (vHigh < 0)
-            {
-                rotation = BitOperations.LeadingZeroCount(~(ulong)vHigh);
-            }
-            else
-            {
-                rotation = (selectedESize - BitOperations.TrailingZeroCount(value)) & (selectedESize - 1);
-            }
-
-            // Reconstruct value and see if it matches. If not, we can't encode.
-            ulong reconstructed = onesCount == 64 ? ulong.MaxValue : RotateRight((1UL << onesCount) - 1, rotation, selectedESize);
-
-            for (int bit = 32; bit >= selectedESize; bit >>= 1)
-            {
-                reconstructed |= reconstructed << bit;
-            }
-
-            if (reconstructed != value || onesCount == 0)
+            // Check the value is repeating; also ensures element size is a power of two.
+            if (ulong.RotateRight(value, elementSize) != value)
             {
                 immN = 0;
                 immS = 0;
@@ -140,34 +81,11 @@ namespace ARMeilleure.CodeGen.Arm64
                 return false;
             }
 
-            immR = rotation;
-
-            // immN indicates that there are no repetitions.
-            // The MSB of immS indicates the amount of repetitions, and the LSB the number of bits set.
-            if (repetitions == 1)
-            {
-                immN = 1;
-                immS = 0;
-            }
-            else
-            {
-                immN = 0;
-                immS = (0xf80 >> BitOperations.Log2((uint)repetitions)) & 0x3f;
-            }
-
-            immS |= onesCount - 1;
+            immN = (elementSize >> 6) & 1;
+            immS = (((~elementSize + 1) << 1) | (onesInElement - 1)) & 0x3f;
+            immR = (elementSize - rotation) & (elementSize - 1);
 
             return true;
         }
-
-        private static int CountSequence(ulong value)
-        {
-            return BitOperations.TrailingZeroCount(value) + BitOperations.TrailingZeroCount(~value);
-        }
-
-        private static ulong RotateRight(ulong bits, int shift, int size)
-        {
-            return (bits >> shift) | ((bits << (size - shift)) & (size == 64 ? ulong.MaxValue : (1UL << size) - 1));
-        }
     }
 }
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs b/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs
index 704aa45acd..fc4fa976ea 100644
--- a/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs
+++ b/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs
@@ -1303,7 +1303,15 @@ namespace ARMeilleure.CodeGen.Arm64
 
         private static void GenerateConstantCopy(CodeGenContext context, Operand dest, ulong value)
         {
-            if (value != 0)
+            if (value == 0)
+            {
+                context.Assembler.Mov(dest, Register(ZrRegister, dest.Type));
+            }
+            else if (CodeGenCommon.TryEncodeBitMask(dest.Type, value, out _, out _, out _))
+            {
+                context.Assembler.Orr(dest, Register(ZrRegister, dest.Type), Const(dest.Type, (long)value));
+            }
+            else
             {
                 int hw = 0;
                 bool first = true;
@@ -1328,10 +1336,6 @@ namespace ARMeilleure.CodeGen.Arm64
                     value >>= 16;
                 }
             }
-            else
-            {
-                context.Assembler.Mov(dest, Register(ZrRegister, dest.Type));
-            }
         }
 
         private static void GenerateAtomicCas(
diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs
index e5e0b2a541..aeb5868c94 100644
--- a/ARMeilleure/Translation/PTC/Ptc.cs
+++ b/ARMeilleure/Translation/PTC/Ptc.cs
@@ -29,7 +29,7 @@ namespace ARMeilleure.Translation.PTC
         private const string OuterHeaderMagicString = "PTCohd\0\0";
         private const string InnerHeaderMagicString = "PTCihd\0\0";
 
-        private const uint InternalVersion = 4272; //! To be incremented manually for each change to the ARMeilleure project.
+        private const uint InternalVersion = 4328; //! To be incremented manually for each change to the ARMeilleure project.
 
         private const string ActualDir = "0";
         private const string BackupDir = "1";
diff --git a/Ryujinx.Tests/Cpu/Arm64CodeGenCommonTests.cs b/Ryujinx.Tests/Cpu/Arm64CodeGenCommonTests.cs
new file mode 100644
index 0000000000..e16361bbcd
--- /dev/null
+++ b/Ryujinx.Tests/Cpu/Arm64CodeGenCommonTests.cs
@@ -0,0 +1,46 @@
+using ARMeilleure.CodeGen.Arm64;
+using NUnit.Framework;
+
+namespace Ryujinx.Tests.Cpu
+{
+    public class Arm64CodeGenCommonTests
+    {
+        public struct TestCase
+        {
+            public ulong Value;
+            public bool Valid;
+            public int ImmN;
+            public int ImmS;
+            public int ImmR;
+        }
+
+        public static readonly TestCase[] TestCases =
+        {
+            new() { Value = 0, Valid = false, ImmN = 0, ImmS = 0, ImmR = 0 },
+            new() { Value = 0x970977f35f848714, Valid = false, ImmN = 0, ImmS = 0, ImmR = 0 },
+            new() { Value = 0xffffffffffffffff, Valid = false, ImmN = 0, ImmS = 0, ImmR = 0 },
+            new() { Value = 0x5555555555555555, Valid = true, ImmN = 0, ImmS = 0x3c, ImmR = 0 },
+            new() { Value = 0xaaaaaaaaaaaaaaaa, Valid = true, ImmN = 0, ImmS = 0x3c, ImmR = 1 },
+            new() { Value = 0x6666666666666666, Valid = true, ImmN = 0, ImmS = 0x39, ImmR = 3 },
+            new() { Value = 0x1c1c1c1c1c1c1c1c, Valid = true, ImmN = 0, ImmS = 0x32, ImmR = 6 },
+            new() { Value = 0x0f0f0f0f0f0f0f0f, Valid = true, ImmN = 0, ImmS = 0x33, ImmR = 0 },
+            new() { Value = 0xf1f1f1f1f1f1f1f1, Valid = true, ImmN = 0, ImmS = 0x34, ImmR = 4 },
+            new() { Value = 0xe7e7e7e7e7e7e7e7, Valid = true, ImmN = 0, ImmS = 0x35, ImmR = 3 },
+            new() { Value = 0xc001c001c001c001, Valid = true, ImmN = 0, ImmS = 0x22, ImmR = 2 },
+            new() { Value = 0x0000038000000380, Valid = true, ImmN = 0, ImmS = 0x02, ImmR = 25 },
+            new() { Value = 0xffff8fffffff8fff, Valid = true, ImmN = 0, ImmS = 0x1c, ImmR = 17 },
+            new() { Value = 0x000000000ffff800, Valid = true, ImmN = 1, ImmS = 0x10, ImmR = 53 },
+        };
+        
+        [Test]
+        public void BitImmTests([ValueSource(nameof(TestCases))] TestCase test)
+        {
+            bool valid = CodeGenCommon.TryEncodeBitMask(test.Value, out int immN, out int immS, out int immR);
+
+            Assert.That(valid, Is.EqualTo(test.Valid));
+            Assert.That(immN, Is.EqualTo(test.ImmN));
+            Assert.That(immS, Is.EqualTo(test.ImmS));
+            Assert.That(immR, Is.EqualTo(test.ImmR));
+        }
+    }
+}
\ No newline at end of file