From 2502f1f07f31abe30a641d651c9640f3d81c2c0f Mon Sep 17 00:00:00 2001
From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>
Date: Thu, 24 Dec 2020 03:58:36 +0100
Subject: [PATCH] Free up memory allocated by Pools during any PPTC
 translations at boot time. (#1814)

* Added support for offline invalidation, via PPTC, of low cq translations replaced by high cq translations; both on a single run and between runs.

Added invalidation of .cache files in the event of reuse on a different user operating system.

Added .info and .cache files invalidation in case of a failed stream decompression.

Nits.

* InternalVersion = 1712;

* Nits.

* Address comment.

* Get rid of BinaryFormatter.

Nits.

* Move Ptc.LoadTranslations().

Nits.

* Nits.

* Fixed corner cases (in case backup copies have to be used). Added save logs.

* Not core fixes.

* Complement to the previous commit. Added load logs. Removed BinaryFormatter leftovers.

* Add LoadTranslations log.

* Nits.

* Removed the search and management of LowCq overlapping functions.

* Final increment of .info and .cache flags.

* Nit.

* Free up memory allocated by Pools during any PPTC translations at boot time.

* Nit due to rebase.
---
 ARMeilleure/Common/BitMapPool.cs              |  5 +-
 ARMeilleure/Common/ThreadStaticPool.cs        | 25 +++++++-
 .../OperandHelper.cs                          | 10 +++-
 .../OperationHelper.cs                        |  7 ++-
 ARMeilleure/Translation/DirectCallStubs.cs    |  2 +
 ARMeilleure/Translation/PTC/Ptc.cs            | 57 ++++++++++++-------
 ARMeilleure/Translation/PTC/PtcProfiler.cs    | 12 ++--
 ARMeilleure/Translation/Translator.cs         | 12 +++-
 8 files changed, 93 insertions(+), 37 deletions(-)

diff --git a/ARMeilleure/Common/BitMapPool.cs b/ARMeilleure/Common/BitMapPool.cs
index caba231716..aac32d55fb 100644
--- a/ARMeilleure/Common/BitMapPool.cs
+++ b/ARMeilleure/Common/BitMapPool.cs
@@ -1,6 +1,4 @@
-using System;
-
-namespace ARMeilleure.Common
+namespace ARMeilleure.Common
 {
     static class BitMapPool
     {
@@ -8,6 +6,7 @@ namespace ARMeilleure.Common
         {
             BitMap result = ThreadStaticPool<BitMap>.Instance.Allocate();
             result.Reset(initialCapacity);
+
             return result;
         }
 
diff --git a/ARMeilleure/Common/ThreadStaticPool.cs b/ARMeilleure/Common/ThreadStaticPool.cs
index cf3a7bb4d4..3fce28ec0f 100644
--- a/ARMeilleure/Common/ThreadStaticPool.cs
+++ b/ARMeilleure/Common/ThreadStaticPool.cs
@@ -5,12 +5,13 @@ using System.Threading;
 
 namespace ARMeilleure.Common
 {
-    internal class ThreadStaticPool<T> where T : class, new()
+    class ThreadStaticPool<T> where T : class, new()
     {
         private const int PoolSizeIncrement = 200;
 
         [ThreadStatic]
         private static ThreadStaticPool<T> _instance;
+
         public static ThreadStaticPool<T> Instance
         {
             get
@@ -19,6 +20,7 @@ namespace ARMeilleure.Common
                 {
                     PreparePool(0); // So that we can still use a pool when blindly initializing one.
                 }
+
                 return _instance;
             }
         }
@@ -33,9 +35,10 @@ namespace ARMeilleure.Common
         public static void PreparePool(int groupId)
         {
             // Prepare the pool for this thread, ideally using an existing one from the specified group.
+
             if (_instance == null)
             {
-                Stack<ThreadStaticPool<T>> pools = GetPools(groupId);
+                var pools = GetPools(groupId);
                 lock (pools)
                 {
                     _instance = (pools.Count != 0) ? pools.Pop() : new ThreadStaticPool<T>(PoolSizeIncrement * 2);
@@ -46,15 +49,29 @@ namespace ARMeilleure.Common
         public static void ReturnPool(int groupId)
         {
             // Reset and return the pool for this thread to the specified group.
-            Stack<ThreadStaticPool<T>> pools = GetPools(groupId);
+
+            var pools = GetPools(groupId);
             lock (pools)
             {
                 _instance.Clear();
                 pools.Push(_instance);
+
                 _instance = null;
             }
         }
 
+        public static void ResetPools()
+        {
+            // Resets any static references to the pools used by threads for each group, allowing them to be garbage collected.
+
+            foreach (var pools in _pools.Values)
+            {
+                pools.Clear();
+            }
+
+            _pools.Clear();
+        }
+
         private T[] _pool;
         private int _poolUsed = -1;
         private int _poolSize;
@@ -74,10 +91,12 @@ namespace ARMeilleure.Common
         public T Allocate()
         {
             int index = Interlocked.Increment(ref _poolUsed);
+
             if (index >= _poolSize)
             {
                 IncreaseSize();
             }
+
             return _pool[index];
         }
 
diff --git a/ARMeilleure/IntermediateRepresentation/OperandHelper.cs b/ARMeilleure/IntermediateRepresentation/OperandHelper.cs
index c97023fce8..f7381d869f 100644
--- a/ARMeilleure/IntermediateRepresentation/OperandHelper.cs
+++ b/ARMeilleure/IntermediateRepresentation/OperandHelper.cs
@@ -90,10 +90,16 @@ namespace ARMeilleure.IntermediateRepresentation
             ThreadStaticPool<MemoryOperand>.PreparePool(highCq ? 1 : 0);
         }
 
-        public static void ResetOperandPool(bool highCq)
+        public static void ReturnOperandPool(bool highCq)
         {
             ThreadStaticPool<Operand>.ReturnPool(highCq ? 1 : 0);
             ThreadStaticPool<MemoryOperand>.ReturnPool(highCq ? 1 : 0);
         }
+
+        public static void ResetOperandPools()
+        {
+            ThreadStaticPool<Operand>.ResetPools();
+            ThreadStaticPool<MemoryOperand>.ResetPools();
+        }
     }
-}
\ No newline at end of file
+}
diff --git a/ARMeilleure/IntermediateRepresentation/OperationHelper.cs b/ARMeilleure/IntermediateRepresentation/OperationHelper.cs
index 20c7d4efb8..538bdac485 100644
--- a/ARMeilleure/IntermediateRepresentation/OperationHelper.cs
+++ b/ARMeilleure/IntermediateRepresentation/OperationHelper.cs
@@ -51,9 +51,14 @@ namespace ARMeilleure.IntermediateRepresentation
             ThreadStaticPool<Operation>.PreparePool(highCq ? 1 : 0);
         }
 
-        public static void ResetOperationPool(bool highCq)
+        public static void ReturnOperationPool(bool highCq)
         {
             ThreadStaticPool<Operation>.ReturnPool(highCq ? 1 : 0);
         }
+
+        public static void ResetOperationPools()
+        {
+            ThreadStaticPool<Operation>.ResetPools();
+        }
     }
 }
diff --git a/ARMeilleure/Translation/DirectCallStubs.cs b/ARMeilleure/Translation/DirectCallStubs.cs
index df7ca16e7f..57397d14b7 100644
--- a/ARMeilleure/Translation/DirectCallStubs.cs
+++ b/ARMeilleure/Translation/DirectCallStubs.cs
@@ -34,6 +34,8 @@ namespace ARMeilleure.Translation
                 _indirectCallStubPtr     = Marshal.GetFunctionPointerForDelegate<GuestFunction>(GenerateIndirectCallStub(false));
                 _indirectTailCallStubPtr = Marshal.GetFunctionPointerForDelegate<GuestFunction>(GenerateIndirectCallStub(true));
 
+                Translator.ResetPools();
+
                 _initialized = true;
             }
         }
diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs
index b5a92b9765..344925598e 100644
--- a/ARMeilleure/Translation/PTC/Ptc.cs
+++ b/ARMeilleure/Translation/PTC/Ptc.cs
@@ -8,13 +8,13 @@ using Ryujinx.Common.Logging;
 using System;
 using System.Buffers.Binary;
 using System.Collections.Concurrent;
+using System.Collections.Generic;
 using System.Diagnostics;
 using System.IO;
 using System.IO.Compression;
 using System.Runtime.InteropServices;
 using System.Security.Cryptography;
 using System.Threading;
-using System.Threading.Tasks;
 
 namespace ARMeilleure.Translation.PTC
 {
@@ -664,35 +664,50 @@ namespace ARMeilleure.Translation.PTC
 
             ThreadPool.QueueUserWorkItem(TranslationLogger, profiledFuncsToTranslate.Count);
 
+            void TranslateFuncs()
+            {
+                while (profiledFuncsToTranslate.TryDequeue(out var item))
+                {
+                    ulong address = item.address;
+
+                    Debug.Assert(PtcProfiler.IsAddressInStaticCodeRange(address));
+
+                    TranslatedFunction func = Translator.Translate(memory, jumpTable, address, item.mode, item.highCq);
+
+                    bool isAddressUnique = funcs.TryAdd(address, func);
+
+                    Debug.Assert(isAddressUnique, $"The address 0x{address:X16} is not unique.");
+
+                    Interlocked.Increment(ref _translateCount);
+
+                    if (State != PtcState.Enabled)
+                    {
+                        break;
+                    }
+                }
+            }
+
             int maxDegreeOfParallelism = (Environment.ProcessorCount * 3) / 4;
 
-            Parallel.ForEach(profiledFuncsToTranslate, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism }, (item, state) =>
+            List<Thread> threads = new List<Thread>();
+
+            for (int i = 0; i < maxDegreeOfParallelism; i++)
             {
-                ulong address = item.Key;
+                Thread thread = new Thread(TranslateFuncs);
+                thread.IsBackground = true;
 
-                Debug.Assert(PtcProfiler.IsAddressInStaticCodeRange(address));
+                threads.Add(thread);
+            }
 
-                TranslatedFunction func = Translator.Translate(memory, jumpTable, address, item.Value.mode, item.Value.highCq);
+            threads.ForEach((thread) => thread.Start());
+            threads.ForEach((thread) => thread.Join());
 
-                bool isAddressUnique = funcs.TryAdd(address, func);
-
-                Debug.Assert(isAddressUnique, $"The address 0x{address:X16} is not unique.");
-
-                if (func.HighCq)
-                {
-                    jumpTable.RegisterFunction(address, func);
-                }
-
-                Interlocked.Increment(ref _translateCount);
-
-                if (State != PtcState.Enabled)
-                {
-                    state.Stop();
-                }
-            });
+            threads.Clear();
 
             _loggerEvent.Set();
 
+            Translator.ResetPools();
+
             PtcJumpTable.Initialize(jumpTable);
 
             PtcJumpTable.ReadJumpTable(jumpTable);
diff --git a/ARMeilleure/Translation/PTC/PtcProfiler.cs b/ARMeilleure/Translation/PTC/PtcProfiler.cs
index bc9814eccf..0def32c328 100644
--- a/ARMeilleure/Translation/PTC/PtcProfiler.cs
+++ b/ARMeilleure/Translation/PTC/PtcProfiler.cs
@@ -85,15 +85,17 @@ namespace ARMeilleure.Translation.PTC
             return address >= StaticCodeStart && address < StaticCodeStart + StaticCodeSize;
         }
 
-        internal static Dictionary<ulong, (ExecutionMode mode, bool highCq)> GetProfiledFuncsToTranslate(ConcurrentDictionary<ulong, TranslatedFunction> funcs)
+        internal static ConcurrentQueue<(ulong address, ExecutionMode mode, bool highCq)> GetProfiledFuncsToTranslate(ConcurrentDictionary<ulong, TranslatedFunction> funcs)
         {
-            var profiledFuncsToTranslate = new Dictionary<ulong, (ExecutionMode mode, bool highCq)>(ProfiledFuncs);
+            var profiledFuncsToTranslate = new ConcurrentQueue<(ulong address, ExecutionMode mode, bool highCq)>();
 
-            foreach (ulong address in profiledFuncsToTranslate.Keys)
+            foreach (var profiledFunc in ProfiledFuncs)
             {
-                if (funcs.ContainsKey(address))
+                ulong address = profiledFunc.Key;
+
+                if (!funcs.ContainsKey(address))
                 {
-                    profiledFuncsToTranslate.Remove(address);
+                    profiledFuncsToTranslate.Enqueue((address, profiledFunc.Value.mode, profiledFunc.Value.highCq));
                 }
             }
 
diff --git a/ARMeilleure/Translation/Translator.cs b/ARMeilleure/Translation/Translator.cs
index d78f5e2126..612f66479c 100644
--- a/ARMeilleure/Translation/Translator.cs
+++ b/ARMeilleure/Translation/Translator.cs
@@ -148,6 +148,8 @@ namespace ARMeilleure.Translation
 
                 ClearJitCache();
 
+                ResetPools();
+
                 _jumpTable.Dispose();
                 _jumpTable = null;
             }
@@ -249,12 +251,18 @@ namespace ARMeilleure.Translation
                 }
             }
 
-            ResetOperandPool(highCq);
-            ResetOperationPool(highCq);
+            ReturnOperandPool(highCq);
+            ReturnOperationPool(highCq);
 
             return new TranslatedFunction(func, funcSize, highCq);
         }
 
+        internal static void ResetPools()
+        {
+            ResetOperandPools();
+            ResetOperationPools();
+        }
+
         private struct Range
         {
             public ulong Start { get; }