From 998be765cf7f7da5ff0c1c08de704c9012b0f49c Mon Sep 17 00:00:00 2001
From: LDj3SNuD <dvitiello@gmail.com>
Date: Tue, 16 Feb 2021 22:16:24 +0100
Subject: [PATCH] Revert #1987.

---
 .../Instructions/InstEmitMemoryExHelper.cs    |  31 ++++-
 .../Instructions/InstEmitMemoryHelper.cs      | 128 ++++++++++++------
 Ryujinx.Cpu/MemoryManager.cs                  |  13 +-
 3 files changed, 121 insertions(+), 51 deletions(-)

diff --git a/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs b/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs
index 15f5e2abc9..317e4276ee 100644
--- a/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs
+++ b/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs
@@ -19,8 +19,19 @@ namespace ARMeilleure.Instructions
 
                 if (size == 4)
                 {
+                    Operand isUnalignedAddr = InstEmitMemoryHelper.EmitAddressCheck(context, address, size);
+
+                    Operand lblFastPath = Label();
+
+                    context.BranchIfFalse(lblFastPath, isUnalignedAddr);
+
+                    // The call is not expected to return (it should throw).
+                    context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ThrowInvalidMemoryAccess)), address);
+
+                    context.MarkLabel(lblFastPath);
+
                     // Only 128-bit CAS is guaranteed to have a atomic load.
-                    Operand physAddr = InstEmitMemoryHelper.EmitPtPointerLoad(context, address, null, write: false, 4);
+                    Operand physAddr = InstEmitMemoryHelper.EmitPtPointerLoad(context, address, null, write: false);
 
                     Operand zero = context.VectorZero();
 
@@ -108,8 +119,20 @@ namespace ARMeilleure.Instructions
 
                 context.BranchIfTrue(lblExit, exFailed);
 
-                // STEP 2: We have exclusive access and the address is valid, attempt the store using CAS.
-                Operand physAddr = InstEmitMemoryHelper.EmitPtPointerLoad(context, address, null, write: true, size);
+                // STEP 2: We have exclusive access, make sure that the address is valid.
+                Operand isUnalignedAddr = InstEmitMemoryHelper.EmitAddressCheck(context, address, size);
+
+                Operand lblFastPath = Label();
+
+                context.BranchIfFalse(lblFastPath, isUnalignedAddr);
+
+                // The call is not expected to return (it should throw).
+                context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ThrowInvalidMemoryAccess)), address);
+
+                // STEP 3: We have exclusive access and the address is valid, attempt the store using CAS.
+                context.MarkLabel(lblFastPath);
+
+                Operand physAddr = InstEmitMemoryHelper.EmitPtPointerLoad(context, address, null, write: true);
 
                 Operand exValuePtr = context.Add(arg0, Const((long)NativeContext.GetExclusiveValueOffset()));
                 Operand exValue = size switch
@@ -128,7 +151,7 @@ namespace ARMeilleure.Instructions
                     _ => context.CompareAndSwap(physAddr, exValue, value)
                 };
 
-                // STEP 3: Check if we succeeded by comparing expected and in-memory values.
+                // STEP 4: Check if we succeeded by comparing expected and in-memory values.
                 Operand storeFailed;
 
                 if (size == 4)
diff --git a/ARMeilleure/Instructions/InstEmitMemoryHelper.cs b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs
index cb4fae8f9b..fd5c5bca30 100644
--- a/ARMeilleure/Instructions/InstEmitMemoryHelper.cs
+++ b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs
@@ -127,7 +127,11 @@ namespace ARMeilleure.Instructions
             Operand lblSlowPath = Label();
             Operand lblEnd      = Label();
 
-            Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: false, size);
+            Operand isUnalignedAddr = EmitAddressCheck(context, address, size);
+
+            context.BranchIfTrue(lblSlowPath, isUnalignedAddr);
+
+            Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: false);
 
             Operand value = null;
 
@@ -157,7 +161,18 @@ namespace ARMeilleure.Instructions
                 throw new ArgumentOutOfRangeException(nameof(size));
             }
 
-            Operand physAddr = EmitPtPointerLoad(context, address, null, write: false, size);
+            Operand isUnalignedAddr = EmitAddressCheck(context, address, size);
+
+            Operand lblFastPath = Label();
+
+            context.BranchIfFalse(lblFastPath, isUnalignedAddr, BasicBlockFrequency.Cold);
+
+            // The call is not expected to return (it should throw).
+            context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ThrowInvalidMemoryAccess)), address);
+
+            context.MarkLabel(lblFastPath);
+
+            Operand physAddr = EmitPtPointerLoad(context, address, null, write: false);
 
             return size switch
             {
@@ -180,7 +195,11 @@ namespace ARMeilleure.Instructions
             Operand lblSlowPath = Label();
             Operand lblEnd      = Label();
 
-            Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: false, size);
+            Operand isUnalignedAddr = EmitAddressCheck(context, address, size);
+
+            context.BranchIfTrue(lblSlowPath, isUnalignedAddr);
+
+            Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: false);
 
             Operand value = null;
 
@@ -214,7 +233,11 @@ namespace ARMeilleure.Instructions
             Operand lblSlowPath = Label();
             Operand lblEnd      = Label();
 
-            Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: true, size);
+            Operand isUnalignedAddr = EmitAddressCheck(context, address, size);
+
+            context.BranchIfTrue(lblSlowPath, isUnalignedAddr);
+
+            Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: true);
 
             Operand value = GetInt(context, rt);
 
@@ -247,7 +270,18 @@ namespace ARMeilleure.Instructions
                 throw new ArgumentOutOfRangeException(nameof(size));
             }
 
-            Operand physAddr = EmitPtPointerLoad(context, address, null, write: true, size);
+            Operand isUnalignedAddr = EmitAddressCheck(context, address, size);
+
+            Operand lblFastPath = Label();
+
+            context.BranchIfFalse(lblFastPath, isUnalignedAddr, BasicBlockFrequency.Cold);
+
+            // The call is not expected to return (it should throw).
+            context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ThrowInvalidMemoryAccess)), address);
+
+            context.MarkLabel(lblFastPath);
+
+            Operand physAddr = EmitPtPointerLoad(context, address, null, write: true);
 
             if (size < 3 && value.Type == OperandType.I64)
             {
@@ -278,7 +312,11 @@ namespace ARMeilleure.Instructions
             Operand lblSlowPath = Label();
             Operand lblEnd      = Label();
 
-            Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: true, size);
+            Operand isUnalignedAddr = EmitAddressCheck(context, address, size);
+
+            context.BranchIfTrue(lblSlowPath, isUnalignedAddr);
+
+            Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: true);
 
             Operand value = GetVec(rt);
 
@@ -300,49 +338,61 @@ namespace ARMeilleure.Instructions
             context.MarkLabel(lblEnd);
         }
 
-        public static Operand EmitPtPointerLoad(ArmEmitterContext context, Operand address, Operand lblSlowPath, bool write, int size)
+        public static Operand EmitAddressCheck(ArmEmitterContext context, Operand address, int size)
         {
-            int ptLevelBits = context.Memory.AddressSpaceBits - PageBits;
+            ulong addressCheckMask = ~((1UL << context.Memory.AddressSpaceBits) - 1);
+
+            addressCheckMask |= (1u << size) - 1;
+
+            return context.BitwiseAnd(address, Const(address.Type, (long)addressCheckMask));
+        }
+
+        public static Operand EmitPtPointerLoad(ArmEmitterContext context, Operand address, Operand lblSlowPath, bool write)
+        {
+            int ptLevelBits = context.Memory.AddressSpaceBits - 12; // 12 = Number of page bits.
             int ptLevelSize = 1 << ptLevelBits;
             int ptLevelMask = ptLevelSize - 1;
 
-            Operand addrRotated = size != 0 ? context.RotateRight(address, Const(size)) : address;
-            Operand addrShifted = context.ShiftRightUI(addrRotated, Const(PageBits - size));
-
             Operand pte = Ptc.State == PtcState.Disabled
                 ? Const(context.Memory.PageTablePointer.ToInt64())
                 : Const(context.Memory.PageTablePointer.ToInt64(), true, Ptc.PageTablePointerIndex);
 
-            Operand pteOffset = context.BitwiseAnd(addrShifted, Const(addrShifted.Type, ptLevelMask));
+            int bit = PageBits;
 
-            if (pteOffset.Type == OperandType.I32)
+            // Load page table entry from the page table.
+            // This was designed to support multi-level page tables of any size, however right
+            // now we only use flat page tables (so there's only one level).
+            // The page table entry contains the host address where the page is located.
+            // Additionally, the higher 16-bits of the host address may contain extra information
+            // used for write tracking, so this must be handled here aswell.
+            do
             {
-                pteOffset = context.ZeroExtend32(OperandType.I64, pteOffset);
+                Operand addrPart = context.ShiftRightUI(address, Const(bit));
+
+                bit += ptLevelBits;
+
+                if (bit < context.Memory.AddressSpaceBits)
+                {
+                    addrPart = context.BitwiseAnd(addrPart, Const(addrPart.Type, ptLevelMask));
+                }
+
+                Operand pteOffset = context.ShiftLeft(addrPart, Const(3));
+
+                if (pteOffset.Type == OperandType.I32)
+                {
+                    pteOffset = context.ZeroExtend32(OperandType.I64, pteOffset);
+                }
+
+                Operand pteAddress = context.Add(pte, pteOffset);
+
+                pte = context.Load(OperandType.I64, pteAddress);
             }
-
-            pte = context.Load(OperandType.I64, context.Add(pte, context.ShiftLeft(pteOffset, Const(3))));
-
-            if (addrShifted.Type == OperandType.I32)
-            {
-                addrShifted = context.ZeroExtend32(OperandType.I64, addrShifted);
-            }
-
-            // If the VA is out of range, or not aligned to the access size, force PTE to 0 by masking it.
-            pte = context.BitwiseAnd(pte, context.ShiftRightSI(context.Add(addrShifted, Const(-(long)ptLevelSize)), Const(63)));
+            while (bit < context.Memory.AddressSpaceBits);
 
             if (lblSlowPath != null)
             {
-                if (write)
-                {
-                    pte = context.ShiftLeft(pte, Const(1));
-                    context.BranchIf(lblSlowPath, pte, Const(0L), Comparison.LessOrEqual);
-                    pte = context.ShiftRightUI(pte, Const(1));
-                }
-                else
-                {
-                    context.BranchIf(lblSlowPath, pte, Const(0L), Comparison.LessOrEqual);
-                    pte = context.BitwiseAnd(pte, Const(0xffffffffffffUL)); // Ignore any software protection bits. (they are still used by C# memory access)
-                }
+                ulong protection = (write ? 3UL : 1UL) << 48;
+                context.BranchIfTrue(lblSlowPath, context.BitwiseAnd(pte, Const(protection)));
             }
             else
             {
@@ -351,15 +401,13 @@ namespace ARMeilleure.Instructions
 
                 Operand lblNotWatched = Label();
 
-                // Is the page currently being tracked for read/write? If so we need to call SignalMemoryTracking.
+                // Is the page currently being tracked for read/write? If so we need to call MarkRegionAsModified.
                 context.BranchIf(lblNotWatched, pte, Const(0L), Comparison.GreaterOrEqual, BasicBlockFrequency.Cold);
 
-                // Signal memory tracking. Size here doesn't matter as address is assumed to be size aligned here.
+                // Mark the region as modified. Size here doesn't matter as address is assumed to be size aligned here.
                 context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.SignalMemoryTracking)), address, Const(1UL), Const(write ? 1 : 0));
                 context.MarkLabel(lblNotWatched);
 
-                pte = context.BitwiseAnd(pte, Const(0xffffffffffffUL)); // Ignore any software protection bits. (they are still used by C# memory access)
-
                 Operand lblNonNull = Label();
 
                 // Skip exception if the PTE address is non-null (not zero).
@@ -370,6 +418,8 @@ namespace ARMeilleure.Instructions
                 context.MarkLabel(lblNonNull);
             }
 
+            pte = context.BitwiseAnd(pte, Const(0xffffffffffffUL)); // Ignore any software protection bits. (they are still used by c# memory access)
+
             Operand pageOffset = context.BitwiseAnd(address, Const(address.Type, PageMask));
 
             if (pageOffset.Type == OperandType.I32)
diff --git a/Ryujinx.Cpu/MemoryManager.cs b/Ryujinx.Cpu/MemoryManager.cs
index 8c8bd3a4c2..cef2012656 100644
--- a/Ryujinx.Cpu/MemoryManager.cs
+++ b/Ryujinx.Cpu/MemoryManager.cs
@@ -21,8 +21,6 @@ namespace Ryujinx.Cpu
 
         private const int PteSize = 8;
 
-        private const int PointerTagBit = 62;
-
         private readonly InvalidAccessHandler _invalidAccessHandler;
 
         /// <summary>
@@ -558,12 +556,11 @@ namespace Ryujinx.Cpu
             // Protection is inverted on software pages, since the default value is 0.
             protection = (~protection) & MemoryPermission.ReadAndWrite;
 
-            long tag = protection switch
+            long tag = (long)protection << 48;
+            if (tag > 0)
             {
-                MemoryPermission.None => 0L,
-                MemoryPermission.Read => 2L << PointerTagBit,
-                _ => 3L << PointerTagBit
-            };
+                tag |= long.MinValue; // If any protection is present, the whole pte is negative.
+            }
 
             ulong endVa = (va + size + PageMask) & ~(ulong)PageMask;
             long invTagMask = ~(0xffffL << 48);
@@ -631,7 +628,7 @@ namespace Ryujinx.Cpu
             // tracking using host guard pages in future, but also supporting platforms where this is not possible.
 
             // Write tag includes read protection, since we don't have any read actions that aren't performed before write too.
-            long tag = (write ? 3L : 2L) << PointerTagBit;
+            long tag = (write ? 3L : 1L) << 48;
 
             ulong endVa = (va + size + PageMask) & ~(ulong)PageMask;