From 312be74861dae16311f4376e32195f8a4fd372c6 Mon Sep 17 00:00:00 2001
From: FICTURE7 <FICTURE7@gmail.com>
Date: Wed, 29 Sep 2021 03:38:37 +0400
Subject: [PATCH] Optimize `HybridAllocator` (#2637)

* Store constant `Operand`s in the `LocalInfo`

Since the spill slot and register assigned is fixed, we can just store
the `Operand` reference in the `LocalInfo` struct. This allows skipping
hitting the intern-table for a look up.

* Skip `Uses`/`Assignments` management

Since the `HybridAllocator` is the last pass and we do not care about
uses/assignments we can skip managing that when setting destinations or
sources.

* Make `GetLocalInfo` inlineable

Also fix a possible issue where with numbered locals. See or-assignment
operator in `SetVisited(local)` before patch.

* Do not run `BlockPlacement` in LCQ

With the host mapped memory manager, there is a lot less cold code to
split from hot code. So disabling this in LCQ gives some extra
throughput - where we need it.

* Address Mou-Ikkai's feedback

* Apply suggestions from code review

Co-authored-by: VocalFan <45863583+Mou-Ikkai@users.noreply.github.com>

* Move check to an assert

Co-authored-by: VocalFan <45863583+Mou-Ikkai@users.noreply.github.com>
---
 .../RegisterAllocators/HybridAllocator.cs     | 135 +++++++++---------
 ARMeilleure/CodeGen/X86/CodeGenerator.cs      |  16 ++-
 .../IntermediateRepresentation/Operand.cs     |   1 +
 .../IntermediateRepresentation/Operation.cs   |  16 +--
 4 files changed, 85 insertions(+), 83 deletions(-)

diff --git a/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs
index 7b339efb3d..de0840265f 100644
--- a/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs
+++ b/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs
@@ -3,6 +3,7 @@ using ARMeilleure.Translation;
 using System;
 using System.Diagnostics;
 using System.Numerics;
+using System.Runtime.CompilerServices;
 using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
 using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
 
@@ -10,9 +11,6 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
 {
     class HybridAllocator : IRegisterAllocator
     {
-        private const int RegistersCount = 16;
-        private const int MaxIROperands  = 4;
-
         private struct BlockInfo
         {
             public bool HasCall { get; }
@@ -32,10 +30,10 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
         {
             public int Uses { get; set; }
             public int UsesAllocated { get; set; }
-            public int Register { get; set; }
-            public int SpillOffset { get; set; }
             public int Sequence { get; set; }
             public Operand Temp { get; set; }
+            public Operand Register { get; set; }
+            public Operand SpillOffset { get; set; }
             public OperandType Type { get; }
 
             private int _first;
@@ -49,10 +47,10 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
                 Type = type;
 
                 UsesAllocated = 0;
-                Register = 0;
-                SpillOffset = 0;
                 Sequence = 0;
                 Temp = default;
+                Register = default;
+                SpillOffset = default;
 
                 _first = -1;
                 _last  = -1;
@@ -74,6 +72,38 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
             }
         }
 
+        private const int MaxIROperands = 4;
+        // The "visited" state is stored in the MSB of the local's value.
+        private const ulong VisitedMask = 1ul << 63;
+
+        private BlockInfo[] _blockInfo;
+        private LocalInfo[] _localInfo;
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static bool IsVisited(Operand local)
+        {
+            Debug.Assert(local.Kind == OperandKind.LocalVariable);
+
+            return (local.GetValueUnsafe() & VisitedMask) != 0;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static void SetVisited(Operand local)
+        {
+            Debug.Assert(local.Kind == OperandKind.LocalVariable);
+
+            local.GetValueUnsafe() |= VisitedMask;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private ref LocalInfo GetLocalInfo(Operand local)
+        {
+            Debug.Assert(local.Kind == OperandKind.LocalVariable);
+            Debug.Assert(IsVisited(local), "Local variable not visited. Used before defined?");
+
+            return ref _localInfo[(uint)local.GetValueUnsafe() - 1];
+        }
+
         public AllocationResult RunPass(ControlFlowGraph cfg, StackAllocator stackAlloc, RegisterMasks regMasks)
         {
             int intUsedRegisters = 0;
@@ -82,35 +112,11 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
             int intFreeRegisters = regMasks.IntAvailableRegisters;
             int vecFreeRegisters = regMasks.VecAvailableRegisters;
 
-            var blockInfo = new BlockInfo[cfg.Blocks.Count];
-            var localInfo = new LocalInfo[cfg.Blocks.Count * 3];
+            _blockInfo = new BlockInfo[cfg.Blocks.Count];
+            _localInfo = new LocalInfo[cfg.Blocks.Count * 3];
+
             int localInfoCount = 0;
 
-            // The "visited" state is stored in the MSB of the local's value.
-            const ulong VisitedMask = 1ul << 63;
-
-            bool IsVisited(Operand local)
-            {
-                return (local.GetValueUnsafe() & VisitedMask) != 0;
-            }
-
-            void SetVisited(Operand local)
-            {
-                local.GetValueUnsafe() |= VisitedMask | (uint)++localInfoCount;
-            }
-
-            ref LocalInfo GetLocalInfo(Operand local)
-            {
-                Debug.Assert(local.Kind == OperandKind.LocalVariable);
-
-                if (!IsVisited(local))
-                {
-                    throw new InvalidOperationException("Local was not visisted yet. Used before defined?");
-                }
-
-                return ref localInfo[(uint)local.GetValueUnsafe() - 1];
-            }
-
             for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
             {
                 BasicBlock block = cfg.PostOrderBlocks[index];
@@ -127,10 +133,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
                         hasCall = true;
                     }
 
-                    for (int i = 0; i < node.SourcesCount; i++)
+                    foreach (Operand source in node.SourcesUnsafe)
                     {
-                        Operand source = node.GetSource(i);
-
                         if (source.Kind == OperandKind.LocalVariable)
                         {
                             GetLocalInfo(source).SetBlockIndex(block.Index);
@@ -151,10 +155,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
                         }
                     }
 
-                    for (int i = 0; i < node.DestinationsCount; i++)
+                    foreach (Operand dest in node.DestinationsUnsafe)
                     {
-                        Operand dest = node.GetDestination(i);
-
                         if (dest.Kind == OperandKind.LocalVariable)
                         {
                             if (IsVisited(dest))
@@ -163,13 +165,14 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
                             }
                             else
                             {
-                                SetVisited(dest);
+                                dest.NumberLocal(++localInfoCount);
 
-                                if (localInfoCount > localInfo.Length)
+                                if (localInfoCount > _localInfo.Length)
                                 {
-                                    Array.Resize(ref localInfo, localInfoCount * 2);
+                                    Array.Resize(ref _localInfo, localInfoCount * 2);
                                 }
 
+                                SetVisited(dest);
                                 GetLocalInfo(dest) = new LocalInfo(dest.Type, UsesCount(dest), block.Index);
                             }
                         }
@@ -187,7 +190,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
                     }
                 }
 
-                blockInfo[block.Index] = new BlockInfo(hasCall, intFixedRegisters, vecFixedRegisters);
+                _blockInfo[block.Index] = new BlockInfo(hasCall, intFixedRegisters, vecFixedRegisters);
             }
 
             int sequence = 0;
@@ -196,7 +199,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
             {
                 BasicBlock block = cfg.PostOrderBlocks[index];
 
-                BlockInfo blkInfo = blockInfo[block.Index];
+                ref BlockInfo blkInfo = ref _blockInfo[block.Index];
 
                 int intLocalFreeRegisters = intFreeRegisters & ~blkInfo.IntFixedRegisters;
                 int vecLocalFreeRegisters = vecFreeRegisters & ~blkInfo.VecFixedRegisters;
@@ -227,23 +230,23 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
 
                         Debug.Assert(info.UsesAllocated <= info.Uses);
 
-                        if (info.Register != -1)
+                        if (info.Register != default)
                         {
-                            Operand reg = Register(info.Register, local.Type.ToRegisterType(), local.Type);
-
                             if (info.UsesAllocated == info.Uses)
                             {
+                                Register reg = info.Register.GetRegister();
+
                                 if (local.Type.IsInteger())
                                 {
-                                    intLocalFreeRegisters |= 1 << info.Register;
+                                    intLocalFreeRegisters |= 1 << reg.Index;
                                 }
                                 else
                                 {
-                                    vecLocalFreeRegisters |= 1 << info.Register;
+                                    vecLocalFreeRegisters |= 1 << reg.Index;
                                 }
                             }
 
-                            return reg;
+                            return info.Register;
                         }
                         else
                         {
@@ -259,7 +262,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
                                 info.Temp = temp;
                             }
 
-                            Operation fillOp = Operation(Instruction.Fill, temp, Const(info.SpillOffset));
+                            Operation fillOp = Operation(Instruction.Fill, temp, info.SpillOffset);
 
                             block.Operations.AddBefore(node, fillOp);
 
@@ -279,9 +282,9 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
                         {
                             ref LocalInfo info = ref GetLocalInfo(source);
 
-                            if (info.Register == -1)
+                            if (info.Register == default)
                             {
-                                Operation fillOp = Operation(Instruction.Fill, node.Destination, Const(info.SpillOffset));
+                                Operation fillOp = Operation(Instruction.Fill, node.Destination, info.SpillOffset);
 
                                 block.Operations.AddBefore(node, fillOp);
                                 block.Operations.Remove(node);
@@ -295,13 +298,11 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
 
                     if (!folded)
                     {
-                        for (int i = 0; i < node.SourcesCount; i++)
+                        foreach (ref Operand source in node.SourcesUnsafe)
                         {
-                            Operand source = node.GetSource(i);
-
                             if (source.Kind == OperandKind.LocalVariable)
                             {
-                                node.SetSource(i, AllocateRegister(source));
+                                source = AllocateRegister(source);
                             }
                             else if (source.Kind == OperandKind.Memory)
                             {
@@ -323,10 +324,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
                     int intLocalAsg = 0;
                     int vecLocalAsg = 0;
 
-                    for (int i = 0; i < node.DestinationsCount; i++)
+                    foreach (ref Operand dest in node.DestinationsUnsafe)
                     {
-                        Operand dest = node.GetDestination(i);
-
                         if (dest.Kind != OperandKind.LocalVariable)
                         {
                             continue;
@@ -344,7 +343,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
                             {
                                 int selectedReg = BitOperations.TrailingZeroCount(mask);
 
-                                info.Register = selectedReg;
+                                info.Register = Register(selectedReg, info.Type.ToRegisterType(), info.Type);
 
                                 if (dest.Type.IsInteger())
                                 {
@@ -359,8 +358,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
                             }
                             else
                             {
-                                info.Register    = -1;
-                                info.SpillOffset = stackAlloc.Allocate(dest.Type.GetSizeInBytes());
+                                info.Register    = default;
+                                info.SpillOffset = Const(stackAlloc.Allocate(dest.Type.GetSizeInBytes()));
                             }
                         }
 
@@ -368,9 +367,9 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
 
                         Debug.Assert(info.UsesAllocated <= info.Uses);
 
-                        if (info.Register != -1)
+                        if (info.Register != default)
                         {
-                            node.SetDestination(i, Register(info.Register, dest.Type.ToRegisterType(), dest.Type));
+                            dest = info.Register;
                         }
                         else
                         {
@@ -386,9 +385,9 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
                                 info.Temp     = temp;
                             }
 
-                            node.SetDestination(i, temp);
+                            dest = temp;
 
-                            Operation spillOp = Operation(Instruction.Spill, default, Const(info.SpillOffset), temp);
+                            Operation spillOp = Operation(Instruction.Spill, default, info.SpillOffset, temp);
 
                             block.Operations.AddAfter(node, spillOp);
 
diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs
index 924c113c4f..ba2df8020a 100644
--- a/ARMeilleure/CodeGen/X86/CodeGenerator.cs
+++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs
@@ -97,16 +97,18 @@ namespace ARMeilleure.CodeGen.X86
 
             Logger.StartPass(PassName.Optimization);
 
-            if ((cctx.Options & CompilerOptions.SsaForm)  != 0 &&
-                (cctx.Options & CompilerOptions.Optimize) != 0)
+            if (cctx.Options.HasFlag(CompilerOptions.Optimize))
             {
-                Optimizer.RunPass(cfg);
+                if (cctx.Options.HasFlag(CompilerOptions.SsaForm))
+                {
+                    Optimizer.RunPass(cfg);
+                }
+
+                BlockPlacement.RunPass(cfg);
             }
 
             X86Optimizer.RunPass(cfg);
 
-            BlockPlacement.RunPass(cfg);
-
             Logger.EndPass(PassName.Optimization, cfg);
 
             Logger.StartPass(PassName.PreAllocation);
@@ -119,14 +121,14 @@ namespace ARMeilleure.CodeGen.X86
 
             Logger.StartPass(PassName.RegisterAllocation);
 
-            if ((cctx.Options & CompilerOptions.SsaForm) != 0)
+            if (cctx.Options.HasFlag(CompilerOptions.SsaForm))
             {
                 Ssa.Deconstruct(cfg);
             }
 
             IRegisterAllocator regAlloc;
 
-            if ((cctx.Options & CompilerOptions.Lsra) != 0)
+            if (cctx.Options.HasFlag(CompilerOptions.Lsra))
             {
                 regAlloc = new LinearScanAllocator();
             }
diff --git a/ARMeilleure/IntermediateRepresentation/Operand.cs b/ARMeilleure/IntermediateRepresentation/Operand.cs
index ff3354f2fd..74b1c71eff 100644
--- a/ARMeilleure/IntermediateRepresentation/Operand.cs
+++ b/ARMeilleure/IntermediateRepresentation/Operand.cs
@@ -146,6 +146,7 @@ namespace ARMeilleure.IntermediateRepresentation
             return BitConverter.Int64BitsToDouble((long)Value);
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal ref ulong GetValueUnsafe()
         {
             return ref _data->Value;
diff --git a/ARMeilleure/IntermediateRepresentation/Operation.cs b/ARMeilleure/IntermediateRepresentation/Operation.cs
index 08b874cf1a..c71e143c3f 100644
--- a/ARMeilleure/IntermediateRepresentation/Operation.cs
+++ b/ARMeilleure/IntermediateRepresentation/Operation.cs
@@ -53,8 +53,8 @@ namespace ARMeilleure.IntermediateRepresentation
         public int DestinationsCount => _data->DestinationsCount;
         public int SourcesCount => _data->SourcesCount;
 
-        private Span<Operand> Destinations => new(_data->Destinations, _data->DestinationsCount);
-        private Span<Operand> Sources => new(_data->Sources, _data->SourcesCount);
+        internal Span<Operand> DestinationsUnsafe => new(_data->Destinations, _data->DestinationsCount);
+        internal Span<Operand> SourcesUnsafe => new(_data->Sources, _data->SourcesCount);
 
         public PhiOperation AsPhi()
         {
@@ -65,17 +65,17 @@ namespace ARMeilleure.IntermediateRepresentation
 
         public Operand GetDestination(int index)
         {
-            return Destinations[index];
+            return DestinationsUnsafe[index];
         }
 
         public Operand GetSource(int index)
         {
-            return Sources[index];
+            return SourcesUnsafe[index];
         }
 
         public void SetDestination(int index, Operand dest)
         {
-            ref Operand curDest = ref Destinations[index];
+            ref Operand curDest = ref DestinationsUnsafe[index];
 
             RemoveAssignment(curDest);
             AddAssignment(dest);
@@ -85,7 +85,7 @@ namespace ARMeilleure.IntermediateRepresentation
 
         public void SetSource(int index, Operand src)
         {
-            ref Operand curSrc = ref Sources[index];
+            ref Operand curSrc = ref SourcesUnsafe[index];
 
             RemoveUse(curSrc);
             AddUse(src);
@@ -274,8 +274,8 @@ namespace ARMeilleure.IntermediateRepresentation
                 EnsureCapacity(ref result._data->Destinations, ref result._data->DestinationsCount, destCount);
                 EnsureCapacity(ref result._data->Sources, ref result._data->SourcesCount, srcCount);
 
-                result.Destinations.Clear();
-                result.Sources.Clear();
+                result.DestinationsUnsafe.Clear();
+                result.SourcesUnsafe.Clear();
 
                 return result;
             }