From 9878fc2d3cf4c64f56c44c2a5de013acb6bcbade Mon Sep 17 00:00:00 2001
From: gdkchan <gab.dark.100@gmail.com>
Date: Thu, 30 Jul 2020 11:29:28 -0300
Subject: [PATCH]  Implement inline memory load/store exclusive and ordered
 (#1413)

* Implement inline memory load/store exclusive

* Fix missing REX prefix on 8-bits CMPXCHG

* Increment PTC version due to bugfix

* Remove redundant memory checks

* Address PR feedback

* Increment PPTC version
---
 .../CodeGen/Optimizations/Optimizer.cs        |   4 +-
 ARMeilleure/CodeGen/X86/Assembler.cs          |  16 ++
 ARMeilleure/CodeGen/X86/CodeGenerator.cs      |  28 +++
 ARMeilleure/CodeGen/X86/PreAllocator.cs       |   6 +-
 ARMeilleure/CodeGen/X86/X86Instruction.cs     |   1 +
 ARMeilleure/Instructions/InstEmitMemoryEx.cs  |  15 +-
 .../Instructions/InstEmitMemoryEx32.cs        |  23 +--
 .../Instructions/InstEmitMemoryExHelper.cs    | 187 ++++++++++++-----
 .../Instructions/InstEmitMemoryHelper.cs      | 119 ++++++++++-
 ARMeilleure/Instructions/NativeInterface.cs   | 195 +-----------------
 .../IntermediateRepresentation/Instruction.cs |   2 +
 ARMeilleure/Memory/IMemoryManager.cs          |   3 +-
 ARMeilleure/Memory/InvalidAccessException.cs  |  23 +++
 ARMeilleure/Memory/MemoryManagerPal.cs        |  76 -------
 ARMeilleure/State/NativeContext.cs            |  15 ++
 ARMeilleure/Translation/Delegates.cs          |  13 +-
 ARMeilleure/Translation/EmitterContext.cs     |  10 +
 ARMeilleure/Translation/PTC/Ptc.cs            |   2 +-
 Ryujinx.Cpu/MemoryManager.cs                  |  23 +--
 19 files changed, 385 insertions(+), 376 deletions(-)
 create mode 100644 ARMeilleure/Memory/InvalidAccessException.cs
 delete mode 100644 ARMeilleure/Memory/MemoryManagerPal.cs

diff --git a/ARMeilleure/CodeGen/Optimizations/Optimizer.cs b/ARMeilleure/CodeGen/Optimizations/Optimizer.cs
index 8b0c75fd6b..06118bfd6d 100644
--- a/ARMeilleure/CodeGen/Optimizations/Optimizer.cs
+++ b/ARMeilleure/CodeGen/Optimizations/Optimizer.cs
@@ -138,7 +138,9 @@ namespace ARMeilleure.CodeGen.Optimizations
         {
             return (node is Operation operation) && (operation.Instruction == Instruction.Call
                 || operation.Instruction == Instruction.Tailcall
-                || operation.Instruction == Instruction.CompareAndSwap);
+                || operation.Instruction == Instruction.CompareAndSwap
+                || operation.Instruction == Instruction.CompareAndSwap16
+                || operation.Instruction == Instruction.CompareAndSwap8);
         }
 
         private static bool IsPropagableCopy(Operation operation)
diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs
index d0ccd6f810..b855f1b11a 100644
--- a/ARMeilleure/CodeGen/X86/Assembler.cs
+++ b/ARMeilleure/CodeGen/X86/Assembler.cs
@@ -101,6 +101,7 @@ namespace ARMeilleure.CodeGen.X86
             Add(X86Instruction.Cmpss,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF3));
             Add(X86Instruction.Cmpxchg,    new InstructionInfo(0x00000fb1, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.None));
             Add(X86Instruction.Cmpxchg16b, new InstructionInfo(0x01000fc7, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.RexW));
+            Add(X86Instruction.Cmpxchg8,   new InstructionInfo(0x00000fb0, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.Reg8Src));
             Add(X86Instruction.Comisd,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f2f, InstructionFlags.Vex | InstructionFlags.Prefix66));
             Add(X86Instruction.Comiss,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f2f, InstructionFlags.Vex));
             Add(X86Instruction.Cpuid,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fa2, InstructionFlags.RegOnly));
@@ -353,6 +354,14 @@ namespace ARMeilleure.CodeGen.X86
             WriteInstruction(memOp, src, src.Type, X86Instruction.Cmpxchg);
         }
 
+        public void Cmpxchg16(MemoryOperand memOp, Operand src)
+        {
+            WriteByte(LockPrefix);
+            WriteByte(0x66);
+
+            WriteInstruction(memOp, src, src.Type, X86Instruction.Cmpxchg);
+        }
+
         public void Cmpxchg16b(MemoryOperand memOp)
         {
             WriteByte(LockPrefix);
@@ -360,6 +369,13 @@ namespace ARMeilleure.CodeGen.X86
             WriteInstruction(memOp, null, OperandType.None, X86Instruction.Cmpxchg16b);
         }
 
+        public void Cmpxchg8(MemoryOperand memOp, Operand src)
+        {
+            WriteByte(LockPrefix);
+
+            WriteInstruction(memOp, src, src.Type, X86Instruction.Cmpxchg8);
+        }
+
         public void Comisd(Operand src1, Operand src2)
         {
             WriteInstruction(src1, null, src2, X86Instruction.Comisd);
diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs
index 5c9fcd8909..f04be52dae 100644
--- a/ARMeilleure/CodeGen/X86/CodeGenerator.cs
+++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs
@@ -39,6 +39,8 @@ namespace ARMeilleure.CodeGen.X86
             Add(Instruction.Call,                    GenerateCall);
             Add(Instruction.Clobber,                 GenerateClobber);
             Add(Instruction.CompareAndSwap,          GenerateCompareAndSwap);
+            Add(Instruction.CompareAndSwap16,        GenerateCompareAndSwap16);
+            Add(Instruction.CompareAndSwap8,         GenerateCompareAndSwap8);
             Add(Instruction.CompareEqual,            GenerateCompareEqual);
             Add(Instruction.CompareGreater,          GenerateCompareGreater);
             Add(Instruction.CompareGreaterOrEqual,   GenerateCompareGreaterOrEqual);
@@ -587,6 +589,32 @@ namespace ARMeilleure.CodeGen.X86
             }
         }
 
+        private static void GenerateCompareAndSwap16(CodeGenContext context, Operation operation)
+        {
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+            Operand src3 = operation.GetSource(2);
+
+            EnsureSameType(src2, src3);
+
+            MemoryOperand memOp = MemoryOp(src3.Type, src1);
+
+            context.Assembler.Cmpxchg16(memOp, src3);
+        }
+
+        private static void GenerateCompareAndSwap8(CodeGenContext context, Operation operation)
+        {
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+            Operand src3 = operation.GetSource(2);
+
+            EnsureSameType(src2, src3);
+
+            MemoryOperand memOp = MemoryOp(src3.Type, src1);
+
+            context.Assembler.Cmpxchg8(memOp, src3);
+        }
+
         private static void GenerateCompareEqual(CodeGenContext context, Operation operation)
         {
             GenerateCompare(context, operation, X86Condition.Equal);
diff --git a/ARMeilleure/CodeGen/X86/PreAllocator.cs b/ARMeilleure/CodeGen/X86/PreAllocator.cs
index dc7f3a75a4..b76e941697 100644
--- a/ARMeilleure/CodeGen/X86/PreAllocator.cs
+++ b/ARMeilleure/CodeGen/X86/PreAllocator.cs
@@ -101,7 +101,7 @@ namespace ARMeilleure.CodeGen.X86
                             if (callConv == CallConvName.Windows)
                             {
                                 HandleTailcallWindowsAbi(block.Operations, stackAlloc, node, operation);
-                            } 
+                            }
                             else
                             {
                                 HandleTailcallSystemVAbi(block.Operations, stackAlloc, node, operation);
@@ -207,6 +207,8 @@ namespace ARMeilleure.CodeGen.X86
             switch (operation.Instruction)
             {
                 case Instruction.CompareAndSwap:
+                case Instruction.CompareAndSwap16:
+                case Instruction.CompareAndSwap8:
                 {
                     OperandType type = operation.GetSource(1).Type;
 
@@ -887,7 +889,7 @@ namespace ARMeilleure.CodeGen.X86
                     HandleConstantRegCopy(nodes, nodes.AddBefore(node, copyOp), copyOp);
 
                     sources.Add(argReg);
-                } 
+                }
                 else
                 {
                     throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)");
diff --git a/ARMeilleure/CodeGen/X86/X86Instruction.cs b/ARMeilleure/CodeGen/X86/X86Instruction.cs
index 9ac17e5bf3..f9b35d371d 100644
--- a/ARMeilleure/CodeGen/X86/X86Instruction.cs
+++ b/ARMeilleure/CodeGen/X86/X86Instruction.cs
@@ -30,6 +30,7 @@ namespace ARMeilleure.CodeGen.X86
         Cmpss,
         Cmpxchg,
         Cmpxchg16b,
+        Cmpxchg8,
         Comisd,
         Comiss,
         Cpuid,
diff --git a/ARMeilleure/Instructions/InstEmitMemoryEx.cs b/ARMeilleure/Instructions/InstEmitMemoryEx.cs
index 7ca019dea0..977f23d384 100644
--- a/ARMeilleure/Instructions/InstEmitMemoryEx.cs
+++ b/ARMeilleure/Instructions/InstEmitMemoryEx.cs
@@ -23,7 +23,7 @@ namespace ARMeilleure.Instructions
 
         public static void Clrex(ArmEmitterContext context)
         {
-            context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ClearExclusive)));
+            EmitClearExclusive(context);
         }
 
         public static void Dmb(ArmEmitterContext context) => EmitBarrier(context);
@@ -139,8 +139,6 @@ namespace ARMeilleure.Instructions
 
             Operand t = GetIntOrZR(context, op.Rt);
 
-            Operand s = null;
-
             if (pair)
             {
                 Debug.Assert(op.Size == 2 || op.Size == 3, "Invalid size for pairwise store.");
@@ -159,18 +157,11 @@ namespace ARMeilleure.Instructions
                     value = context.VectorInsert(value,                t2, 1);
                 }
 
-                s = EmitStoreExclusive(context, address, value, exclusive, op.Size + 1);
+                EmitStoreExclusive(context, address, value, exclusive, op.Size + 1, op.Rs, a32: false);
             }
             else
             {
-                s = EmitStoreExclusive(context, address, t, exclusive, op.Size);
-            }
-
-            if (s != null)
-            {
-                // This is only needed for exclusive stores. The function returns 0
-                // when the store is successful, and 1 otherwise.
-                SetIntOrZR(context, op.Rs, s);
+                EmitStoreExclusive(context, address, t, exclusive, op.Size, op.Rs, a32: false);
             }
         }
 
diff --git a/ARMeilleure/Instructions/InstEmitMemoryEx32.cs b/ARMeilleure/Instructions/InstEmitMemoryEx32.cs
index e8e660ee8b..abe61cd87d 100644
--- a/ARMeilleure/Instructions/InstEmitMemoryEx32.cs
+++ b/ARMeilleure/Instructions/InstEmitMemoryEx32.cs
@@ -13,7 +13,7 @@ namespace ARMeilleure.Instructions
     {
         public static void Clrex(ArmEmitterContext context)
         {
-            context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ClearExclusive)));
+            EmitClearExclusive(context);
         }
 
         public static void Dmb(ArmEmitterContext context) => EmitBarrier(context);
@@ -198,34 +198,21 @@ namespace ARMeilleure.Instructions
                     context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag));
 
                     Operand leResult = context.BitwiseOr(lo, context.ShiftLeft(hi, Const(32)));
-                    Operand leS = EmitStoreExclusive(context, address, leResult, exclusive, size);
-                    if (exclusive)
-                    {
-                        SetIntA32(context, op.Rd, leS);
-                    }
+                    EmitStoreExclusive(context, address, leResult, exclusive, size, op.Rd, a32: true);
 
                     context.Branch(lblEnd);
 
                     context.MarkLabel(lblBigEndian);
 
                     Operand beResult = context.BitwiseOr(hi, context.ShiftLeft(lo, Const(32)));
-                    Operand beS = EmitStoreExclusive(context, address, beResult, exclusive, size);
-                    if (exclusive)
-                    {
-                        SetIntA32(context, op.Rd, beS);
-                    }
+                    EmitStoreExclusive(context, address, beResult, exclusive, size, op.Rd, a32: true);
 
                     context.MarkLabel(lblEnd);
                 }
                 else
                 {
-                    Operand s = EmitStoreExclusive(context, address, context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rt)), exclusive, size);
-                    // This is only needed for exclusive stores. The function returns 0
-                    // when the store is successful, and 1 otherwise.
-                    if (exclusive)
-                    {
-                        SetIntA32(context, op.Rd, s);
-                    }
+                    Operand value = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rt));
+                    EmitStoreExclusive(context, address, value, exclusive, size, op.Rd, a32: true);
                 }
             }
         }
diff --git a/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs b/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs
index 059b9b6a40..a22cd235fc 100644
--- a/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs
+++ b/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs
@@ -1,87 +1,180 @@
 using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
 using ARMeilleure.Translation;
-using System.Reflection;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
 
 namespace ARMeilleure.Instructions
 {
     static class InstEmitMemoryExHelper
     {
-        public static Operand EmitLoadExclusive(
-            ArmEmitterContext context,
-            Operand address,
-            bool exclusive,
-            int size)
-        {
-            MethodInfo info = null;
+        private const int ErgSizeLog2 = 4;
 
+        public static Operand EmitLoadExclusive(ArmEmitterContext context, Operand address, bool exclusive, int size)
+        {
             if (exclusive)
             {
-                switch (size)
+                Operand value;
+
+                if (size == 4)
                 {
-                    case 0: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadByteExclusive));      break;
-                    case 1: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt16Exclusive));    break;
-                    case 2: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt32Exclusive));    break;
-                    case 3: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt64Exclusive));    break;
-                    case 4: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadVector128Exclusive)); break;
+                    Operand isUnalignedAddr = InstEmitMemoryHelper.EmitAddressCheck(context, address, size);
+
+                    Operand lblFastPath = Label();
+
+                    context.BranchIfFalse(lblFastPath, isUnalignedAddr);
+
+                    // The call is not expected to return (it should throw).
+                    context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ThrowInvalidMemoryAccess)), address);
+
+                    context.MarkLabel(lblFastPath);
+
+                    // Only 128-bit CAS is guaranteed to have a atomic load.
+                    Operand physAddr = InstEmitMemoryHelper.EmitPtPointerLoad(context, address, null, write: false);
+
+                    Operand zero = context.VectorZero();
+
+                    value = context.CompareAndSwap(physAddr, zero, zero);
                 }
+                else
+                {
+                    value = InstEmitMemoryHelper.EmitReadIntAligned(context, address, size);
+                }
+
+                Operand arg0 = context.LoadArgument(OperandType.I64, 0);
+
+                Operand exAddrPtr  = context.Add(arg0, Const((long)NativeContext.GetExclusiveAddressOffset()));
+                Operand exValuePtr = context.Add(arg0, Const((long)NativeContext.GetExclusiveValueOffset()));
+
+                context.Store(exAddrPtr, context.BitwiseAnd(address, Const(address.Type, GetExclusiveAddressMask())));
+                context.Store(exValuePtr, value);
+
+                return value;
             }
             else
             {
-                switch (size)
-                {
-                    case 0: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadByte));      break;
-                    case 1: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt16));    break;
-                    case 2: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt32));    break;
-                    case 3: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt64));    break;
-                    case 4: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadVector128)); break;
-                }
+                return InstEmitMemoryHelper.EmitReadIntAligned(context, address, size);
             }
-
-            return context.Call(info, address);
         }
 
-        public static Operand EmitStoreExclusive(
+        public static void EmitStoreExclusive(
             ArmEmitterContext context,
             Operand address,
             Operand value,
             bool exclusive,
-            int size)
+            int size,
+            int rs,
+            bool a32)
         {
             if (size < 3)
             {
                 value = context.ConvertI64ToI32(value);
             }
 
-            MethodInfo info = null;
-
             if (exclusive)
             {
-                switch (size)
+                void SetRs(Operand value)
                 {
-                    case 0: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteByteExclusive));      break;
-                    case 1: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt16Exclusive));    break;
-                    case 2: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt32Exclusive));    break;
-                    case 3: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt64Exclusive));    break;
-                    case 4: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteVector128Exclusive)); break;
+                    if (a32)
+                    {
+                        SetIntA32(context, rs, value);
+                    }
+                    else
+                    {
+                        SetIntOrZR(context, rs, value);
+                    }
                 }
 
-                return context.Call(info, address, value);
+                Operand arg0 = context.LoadArgument(OperandType.I64, 0);
+
+                Operand exAddrPtr = context.Add(arg0, Const((long)NativeContext.GetExclusiveAddressOffset()));
+                Operand exAddr = context.Load(address.Type, exAddrPtr);
+
+                // STEP 1: Check if we have exclusive access to this memory region. If not, fail and skip store.
+                Operand maskedAddress = context.BitwiseAnd(address, Const(GetExclusiveAddressMask()));
+
+                Operand exFailed = context.ICompareNotEqual(exAddr, maskedAddress);
+
+                Operand lblExit = Label();
+
+                SetRs(exFailed);
+
+                context.BranchIfTrue(lblExit, exFailed);
+
+                // STEP 2: We have exclusive access, make sure that the address is valid.
+                Operand isUnalignedAddr = InstEmitMemoryHelper.EmitAddressCheck(context, address, size);
+
+                Operand lblFastPath = Label();
+
+                context.BranchIfFalse(lblFastPath, isUnalignedAddr);
+
+                // The call is not expected to return (it should throw).
+                context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ThrowInvalidMemoryAccess)), address);
+
+                // STEP 3: We have exclusive access and the address is valid, attempt the store using CAS.
+                context.MarkLabel(lblFastPath);
+
+                Operand physAddr = InstEmitMemoryHelper.EmitPtPointerLoad(context, address, null, write: true);
+
+                Operand exValuePtr = context.Add(arg0, Const((long)NativeContext.GetExclusiveValueOffset()));
+                Operand exValue = size switch
+                {
+                    0 => context.Load8(exValuePtr),
+                    1 => context.Load16(exValuePtr),
+                    2 => context.Load(OperandType.I32, exValuePtr),
+                    3 => context.Load(OperandType.I64, exValuePtr),
+                    _ => context.Load(OperandType.V128, exValuePtr)
+                };
+
+                Operand currValue = size switch
+                {
+                    0 => context.CompareAndSwap8(physAddr, exValue, value),
+                    1 => context.CompareAndSwap16(physAddr, exValue, value),
+                    _ => context.CompareAndSwap(physAddr, exValue, value)
+                };
+
+                // STEP 4: Check if we succeeded by comparing expected and in-memory values.
+                Operand storeFailed;
+
+                if (size == 4)
+                {
+                    Operand currValueLow  = context.VectorExtract(OperandType.I64, currValue, 0);
+                    Operand currValueHigh = context.VectorExtract(OperandType.I64, currValue, 1);
+
+                    Operand exValueLow  = context.VectorExtract(OperandType.I64, exValue, 0);
+                    Operand exValueHigh = context.VectorExtract(OperandType.I64, exValue, 1);
+
+                    storeFailed = context.BitwiseOr(
+                        context.ICompareNotEqual(currValueLow,  exValueLow),
+                        context.ICompareNotEqual(currValueHigh, exValueHigh));
+                }
+                else
+                {
+                    storeFailed = context.ICompareNotEqual(currValue, exValue);
+                }
+
+                SetRs(storeFailed);
+
+                context.MarkLabel(lblExit);
             }
             else
             {
-                switch (size)
-                {
-                    case 0: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteByte));      break;
-                    case 1: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt16));    break;
-                    case 2: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt32));    break;
-                    case 3: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt64));    break;
-                    case 4: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteVector128)); break;
-                }
-
-                context.Call(info, address, value);
-
-                return null;
+                InstEmitMemoryHelper.EmitWriteIntAligned(context, address, value, size);
             }
         }
+
+        public static void EmitClearExclusive(ArmEmitterContext context)
+        {
+            Operand arg0 = context.LoadArgument(OperandType.I64, 0);
+
+            Operand exAddrPtr = context.Add(arg0, Const((long)NativeContext.GetExclusiveAddressOffset()));
+
+            // We store ULONG max to force any exclusive address checks to fail,
+            // since this value is not aligned to the ERG mask.
+            context.Store(exAddrPtr, Const(ulong.MaxValue));
+        }
+
+        private static long GetExclusiveAddressMask() => ~((4L << ErgSizeLog2) - 1);
     }
 }
diff --git a/ARMeilleure/Instructions/InstEmitMemoryHelper.cs b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs
index 0c47be61a3..1fe82b62ce 100644
--- a/ARMeilleure/Instructions/InstEmitMemoryHelper.cs
+++ b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs
@@ -140,7 +140,7 @@ namespace ARMeilleure.Instructions
 
             context.MarkLabel(lblFastPath);
 
-            Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath);
+            Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: false);
 
             Operand value = null;
 
@@ -157,6 +157,36 @@ namespace ARMeilleure.Instructions
             context.MarkLabel(lblEnd);
         }
 
+        public static Operand EmitReadIntAligned(ArmEmitterContext context, Operand address, int size)
+        {
+            if ((uint)size > 4)
+            {
+                throw new ArgumentOutOfRangeException(nameof(size));
+            }
+
+            Operand isUnalignedAddr = EmitAddressCheck(context, address, size);
+
+            Operand lblFastPath = Label();
+
+            context.BranchIfFalse(lblFastPath, isUnalignedAddr);
+
+            // The call is not expected to return (it should throw).
+            context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ThrowInvalidMemoryAccess)), address);
+
+            context.MarkLabel(lblFastPath);
+
+            Operand physAddr = EmitPtPointerLoad(context, address, null, write: false);
+
+            return size switch
+            {
+                0 => context.Load8(physAddr),
+                1 => context.Load16(physAddr),
+                2 => context.Load(OperandType.I32, physAddr),
+                3 => context.Load(OperandType.I64, physAddr),
+                _ => context.Load(OperandType.V128, physAddr)
+            };
+        }
+
         private static void EmitReadVector(
             ArmEmitterContext context,
             Operand address,
@@ -181,7 +211,7 @@ namespace ARMeilleure.Instructions
 
             context.MarkLabel(lblFastPath);
 
-            Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath);
+            Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: false);
 
             Operand value = null;
 
@@ -222,7 +252,7 @@ namespace ARMeilleure.Instructions
 
             context.MarkLabel(lblFastPath);
 
-            Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath);
+            Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: true);
 
             Operand value = GetInt(context, rt);
 
@@ -242,6 +272,45 @@ namespace ARMeilleure.Instructions
             context.MarkLabel(lblEnd);
         }
 
+        public static void EmitWriteIntAligned(ArmEmitterContext context, Operand address, Operand value, int size)
+        {
+            if ((uint)size > 4)
+            {
+                throw new ArgumentOutOfRangeException(nameof(size));
+            }
+
+            Operand isUnalignedAddr = EmitAddressCheck(context, address, size);
+
+            Operand lblFastPath = Label();
+
+            context.BranchIfFalse(lblFastPath, isUnalignedAddr);
+
+            // The call is not expected to return (it should throw).
+            context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ThrowInvalidMemoryAccess)), address);
+
+            context.MarkLabel(lblFastPath);
+
+            Operand physAddr = EmitPtPointerLoad(context, address, null, write: true);
+
+            if (size < 3 && value.Type == OperandType.I64)
+            {
+                value = context.ConvertI64ToI32(value);
+            }
+
+            if (size == 0)
+            {
+                context.Store8(physAddr, value);
+            }
+            else if (size == 1)
+            {
+                context.Store16(physAddr, value);
+            }
+            else
+            {
+                context.Store(physAddr, value);
+            }
+        }
+
         private static void EmitWriteVector(
             ArmEmitterContext context,
             Operand address,
@@ -265,7 +334,7 @@ namespace ARMeilleure.Instructions
 
             context.MarkLabel(lblFastPath);
 
-            Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath);
+            Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: true);
 
             Operand value = GetVec(rt);
 
@@ -281,7 +350,7 @@ namespace ARMeilleure.Instructions
             context.MarkLabel(lblEnd);
         }
 
-        private static Operand EmitAddressCheck(ArmEmitterContext context, Operand address, int size)
+        public static Operand EmitAddressCheck(ArmEmitterContext context, Operand address, int size)
         {
             ulong addressCheckMask = ~((1UL << context.Memory.AddressSpaceBits) - 1);
 
@@ -290,7 +359,7 @@ namespace ARMeilleure.Instructions
             return context.BitwiseAnd(address, Const(address.Type, (long)addressCheckMask));
         }
 
-        private static Operand EmitPtPointerLoad(ArmEmitterContext context, Operand address, Operand lblSlowPath)
+        public static Operand EmitPtPointerLoad(ArmEmitterContext context, Operand address, Operand lblSlowPath, bool write)
         {
             int ptLevelBits = context.Memory.AddressSpaceBits - 12; // 12 = Number of page bits.
             int ptLevelSize = 1 << ptLevelBits;
@@ -302,6 +371,12 @@ namespace ARMeilleure.Instructions
 
             int bit = PageBits;
 
+            // Load page table entry from the page table.
+            // This was designed to support multi-level page tables of any size, however right
+            // now we only use flat page tables (so there's only one level).
+            // The page table entry contains the host address where the page is located.
+            // Additionally, the higher 16-bits of the host address may contain extra information
+            // used for write tracking, so this must be handled here aswell.
             do
             {
                 Operand addrPart = context.ShiftRightUI(address, Const(bit));
@@ -326,7 +401,37 @@ namespace ARMeilleure.Instructions
             }
             while (bit < context.Memory.AddressSpaceBits);
 
-            context.BranchIfTrue(lblSlowPath, context.ICompareLessOrEqual(pte, Const(0L)));
+            if (lblSlowPath != null)
+            {
+                context.BranchIfTrue(lblSlowPath, context.ICompareLessOrEqual(pte, Const(0L)));
+            }
+            else
+            {
+                // When no label is provided to jump to a slow path if the address is invalid,
+                // we do the validation ourselves, and throw if needed.
+                if (write)
+                {
+                    Operand lblNotWatched = Label();
+
+                    // Is the page currently being monitored for modifications? If so we need to call MarkRegionAsModified.
+                    context.BranchIfTrue(lblNotWatched, context.ICompareGreaterOrEqual(pte, Const(0L)));
+
+                    // Mark the region as modified. Size here doesn't matter as address is assumed to be size aligned here.
+                    context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.MarkRegionAsModified)), address, Const(1UL));
+                    context.MarkLabel(lblNotWatched);
+                }
+
+                Operand lblNonNull = Label();
+
+                // Skip exception if the PTE address is non-null (not zero).
+                context.BranchIfTrue(lblNonNull, pte);
+
+                // The call is not expected to return (it should throw).
+                context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ThrowInvalidMemoryAccess)), address);
+                context.MarkLabel(lblNonNull);
+
+                pte = context.BitwiseAnd(pte, Const(0xffffffffffffUL));
+            }
 
             Operand pageOffset = context.BitwiseAnd(address, Const(address.Type, PageMask));
 
diff --git a/ARMeilleure/Instructions/NativeInterface.cs b/ARMeilleure/Instructions/NativeInterface.cs
index 1b2a928859..e8f0db84dd 100644
--- a/ARMeilleure/Instructions/NativeInterface.cs
+++ b/ARMeilleure/Instructions/NativeInterface.cs
@@ -3,38 +3,29 @@ using ARMeilleure.State;
 using ARMeilleure.Translation;
 using System;
 using System.Runtime.InteropServices;
-using System.Threading;
 
 namespace ARMeilleure.Instructions
 {
     static class NativeInterface
     {
-        private const int ErgSizeLog2 = 4;
-
         private class ThreadContext
         {
-            public State.ExecutionContext Context { get; }
+            public ExecutionContext Context { get; }
             public IMemoryManager Memory { get; }
             public Translator Translator { get; }
 
-            public ulong ExclusiveAddress { get; set; }
-            public ulong ExclusiveValueLow { get; set; }
-            public ulong ExclusiveValueHigh { get; set; }
-
-            public ThreadContext(State.ExecutionContext context, IMemoryManager memory, Translator translator)
+            public ThreadContext(ExecutionContext context, IMemoryManager memory, Translator translator)
             {
                 Context = context;
                 Memory = memory;
                 Translator = translator;
-
-                ExclusiveAddress = ulong.MaxValue;
             }
         }
 
         [ThreadStatic]
         private static ThreadContext _context;
 
-        public static void RegisterThread(State.ExecutionContext context, IMemoryManager memory, Translator translator)
+        public static void RegisterThread(ExecutionContext context, IMemoryManager memory, Translator translator)
         {
             _context = new ThreadContext(context, memory, translator);
         }
@@ -202,63 +193,6 @@ namespace ARMeilleure.Instructions
         }
         #endregion
 
-        #region "Read exclusive"
-        public static byte ReadByteExclusive(ulong address)
-        {
-            byte value = _context.Memory.Read<byte>(address);
-
-            _context.ExclusiveAddress = GetMaskedExclusiveAddress(address);
-            _context.ExclusiveValueLow = value;
-            _context.ExclusiveValueHigh = 0;
-
-            return value;
-        }
-
-        public static ushort ReadUInt16Exclusive(ulong address)
-        {
-            ushort value = _context.Memory.Read<ushort>(address);
-
-            _context.ExclusiveAddress = GetMaskedExclusiveAddress(address);
-            _context.ExclusiveValueLow = value;
-            _context.ExclusiveValueHigh = 0;
-
-            return value;
-        }
-
-        public static uint ReadUInt32Exclusive(ulong address)
-        {
-            uint value = _context.Memory.Read<uint>(address);
-
-            _context.ExclusiveAddress = GetMaskedExclusiveAddress(address);
-            _context.ExclusiveValueLow = value;
-            _context.ExclusiveValueHigh = 0;
-
-            return value;
-        }
-
-        public static ulong ReadUInt64Exclusive(ulong address)
-        {
-            ulong value = _context.Memory.Read<ulong>(address);
-
-            _context.ExclusiveAddress = GetMaskedExclusiveAddress(address);
-            _context.ExclusiveValueLow = value;
-            _context.ExclusiveValueHigh = 0;
-
-            return value;
-        }
-
-        public static V128 ReadVector128Exclusive(ulong address)
-        {
-            V128 value = MemoryManagerPal.AtomicLoad128(ref _context.Memory.GetRef<V128>(address));
-
-            _context.ExclusiveAddress = GetMaskedExclusiveAddress(address);
-            _context.ExclusiveValueLow = value.Extract<ulong>(0);
-            _context.ExclusiveValueHigh = value.Extract<ulong>(1);
-
-            return value;
-        }
-        #endregion
-
         #region "Write"
         public static void WriteByte(ulong address, byte value)
         {
@@ -286,122 +220,14 @@ namespace ARMeilleure.Instructions
         }
         #endregion
 
-        #region "Write exclusive"
-        public static int WriteByteExclusive(ulong address, byte value)
+        public static void MarkRegionAsModified(ulong address, ulong size)
         {
-            bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address);
-
-            if (success)
-            {
-                ref int valueRef = ref _context.Memory.GetRefNoChecks<int>(address);
-
-                int currentValue = valueRef;
-
-                byte expected = (byte)_context.ExclusiveValueLow;
-
-                int expected32 = (currentValue & ~byte.MaxValue) | expected;
-                int desired32 = (currentValue & ~byte.MaxValue) | value;
-
-                success = Interlocked.CompareExchange(ref valueRef, desired32, expected32) == expected32;
-
-                if (success)
-                {
-                    ClearExclusive();
-                }
-            }
-
-            return success ? 0 : 1;
+            GetMemoryManager().MarkRegionAsModified(address, size);
         }
 
-        public static int WriteUInt16Exclusive(ulong address, ushort value)
+        public static void ThrowInvalidMemoryAccess(ulong address)
         {
-            bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address);
-
-            if (success)
-            {
-                ref int valueRef = ref _context.Memory.GetRefNoChecks<int>(address);
-
-                int currentValue = valueRef;
-
-                ushort expected = (ushort)_context.ExclusiveValueLow;
-
-                int expected32 = (currentValue & ~ushort.MaxValue) | expected;
-                int desired32 = (currentValue & ~ushort.MaxValue) | value;
-
-                success = Interlocked.CompareExchange(ref valueRef, desired32, expected32) == expected32;
-
-                if (success)
-                {
-                    ClearExclusive();
-                }
-            }
-
-            return success ? 0 : 1;
-        }
-
-        public static int WriteUInt32Exclusive(ulong address, uint value)
-        {
-            bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address);
-
-            if (success)
-            {
-                ref int valueRef = ref _context.Memory.GetRef<int>(address);
-
-                success = Interlocked.CompareExchange(ref valueRef, (int)value, (int)_context.ExclusiveValueLow) == (int)_context.ExclusiveValueLow;
-
-                if (success)
-                {
-                    ClearExclusive();
-                }
-            }
-
-            return success ? 0 : 1;
-        }
-
-        public static int WriteUInt64Exclusive(ulong address, ulong value)
-        {
-            bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address);
-
-            if (success)
-            {
-                ref long valueRef = ref _context.Memory.GetRef<long>(address);
-
-                success = Interlocked.CompareExchange(ref valueRef, (long)value, (long)_context.ExclusiveValueLow) == (long)_context.ExclusiveValueLow;
-
-                if (success)
-                {
-                    ClearExclusive();
-                }
-            }
-
-            return success ? 0 : 1;
-        }
-
-        public static int WriteVector128Exclusive(ulong address, V128 value)
-        {
-            bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address);
-
-            if (success)
-            {
-                V128 expected = new V128(_context.ExclusiveValueLow, _context.ExclusiveValueHigh);
-
-                ref V128 location = ref _context.Memory.GetRef<V128>(address);
-
-                success = MemoryManagerPal.CompareAndSwap128(ref location, expected, value) == expected;
-
-                if (success)
-                {
-                    ClearExclusive();
-                }
-            }
-
-            return success ? 0 : 1;
-        }
-        #endregion
-
-        private static ulong GetMaskedExclusiveAddress(ulong address)
-        {
-            return address & ~((4UL << ErgSizeLog2) - 1);
+            throw new InvalidAccessException(address);
         }
 
         public static ulong GetFunctionAddress(ulong address)
@@ -426,11 +252,6 @@ namespace ARMeilleure.Instructions
             return ptr;
         }
 
-        public static void ClearExclusive()
-        {
-            _context.ExclusiveAddress = ulong.MaxValue;
-        }
-
         public static bool CheckSynchronization()
         {
             Statistics.PauseTimer();
@@ -444,7 +265,7 @@ namespace ARMeilleure.Instructions
             return context.Running;
         }
 
-        public static State.ExecutionContext GetContext()
+        public static ExecutionContext GetContext()
         {
             return _context.Context;
         }
diff --git a/ARMeilleure/IntermediateRepresentation/Instruction.cs b/ARMeilleure/IntermediateRepresentation/Instruction.cs
index dfd02c22af..8ffaf3dc91 100644
--- a/ARMeilleure/IntermediateRepresentation/Instruction.cs
+++ b/ARMeilleure/IntermediateRepresentation/Instruction.cs
@@ -13,6 +13,8 @@ namespace ARMeilleure.IntermediateRepresentation
         ByteSwap,
         Call,
         CompareAndSwap,
+        CompareAndSwap16,
+        CompareAndSwap8,
         CompareEqual,
         CompareGreater,
         CompareGreaterOrEqual,
diff --git a/ARMeilleure/Memory/IMemoryManager.cs b/ARMeilleure/Memory/IMemoryManager.cs
index ce1f58488c..f4e268860b 100644
--- a/ARMeilleure/Memory/IMemoryManager.cs
+++ b/ARMeilleure/Memory/IMemoryManager.cs
@@ -12,8 +12,9 @@ namespace ARMeilleure.Memory
         void Write<T>(ulong va, T value) where T : unmanaged;
 
         ref T GetRef<T>(ulong va) where T : unmanaged;
-        ref T GetRefNoChecks<T>(ulong va) where T : unmanaged;
 
         bool IsMapped(ulong va);
+
+        void MarkRegionAsModified(ulong va, ulong size);
     }
 }
\ No newline at end of file
diff --git a/ARMeilleure/Memory/InvalidAccessException.cs b/ARMeilleure/Memory/InvalidAccessException.cs
new file mode 100644
index 0000000000..ad54071957
--- /dev/null
+++ b/ARMeilleure/Memory/InvalidAccessException.cs
@@ -0,0 +1,23 @@
+using System;
+
+namespace ARMeilleure.Memory
+{
+    class InvalidAccessException : Exception
+    {
+        public InvalidAccessException()
+        {
+        }
+
+        public InvalidAccessException(ulong address) : base($"Invalid memory access at virtual address 0x{address:X16}.")
+        {
+        }
+
+        public InvalidAccessException(string message) : base(message)
+        {
+        }
+
+        public InvalidAccessException(string message, Exception innerException) : base(message, innerException)
+        {
+        }
+    }
+}
diff --git a/ARMeilleure/Memory/MemoryManagerPal.cs b/ARMeilleure/Memory/MemoryManagerPal.cs
deleted file mode 100644
index 0dc83959f0..0000000000
--- a/ARMeilleure/Memory/MemoryManagerPal.cs
+++ /dev/null
@@ -1,76 +0,0 @@
-using ARMeilleure.IntermediateRepresentation;
-using ARMeilleure.State;
-using ARMeilleure.Translation;
-
-namespace ARMeilleure.Memory
-{
-    static class MemoryManagerPal
-    {
-        private delegate V128 CompareExchange128(ref V128 location, V128 expected, V128 desired);
-
-        private static CompareExchange128 _compareExchange128;
-
-        private static object _lock;
-
-        static MemoryManagerPal()
-        {
-            _lock = new object();
-        }
-
-        public static V128 AtomicLoad128(ref V128 location)
-        {
-            return GetCompareAndSwap128()(ref location, V128.Zero, V128.Zero);
-        }
-
-        public static V128 CompareAndSwap128(ref V128 location, V128 expected, V128 desired)
-        {
-            return GetCompareAndSwap128()(ref location, expected, desired);
-        }
-
-        private static CompareExchange128 GetCompareAndSwap128()
-        {
-            if (_compareExchange128 == null)
-            {
-                GenerateCompareAndSwap128();
-            }
-
-            return _compareExchange128;
-        }
-
-        private static void GenerateCompareAndSwap128()
-        {
-            lock (_lock)
-            {
-                if (_compareExchange128 != null)
-                {
-                    return;
-                }
-
-                EmitterContext context = new EmitterContext();
-
-                Operand address  = context.LoadArgument(OperandType.I64,  0);
-                Operand expected = context.LoadArgument(OperandType.V128, 1);
-                Operand desired  = context.LoadArgument(OperandType.V128, 2);
-
-                Operand result = context.CompareAndSwap(address, expected, desired);
-
-                context.Return(result);
-
-                ControlFlowGraph cfg = context.GetControlFlowGraph();
-
-                OperandType[] argTypes = new OperandType[]
-                {
-                    OperandType.I64,
-                    OperandType.V128,
-                    OperandType.V128
-                };
-
-                _compareExchange128 = Compiler.Compile<CompareExchange128>(
-                    cfg,
-                    argTypes,
-                    OperandType.V128,
-                    CompilerOptions.HighCq);
-            }
-        }
-    }
-}
\ No newline at end of file
diff --git a/ARMeilleure/State/NativeContext.cs b/ARMeilleure/State/NativeContext.cs
index 9df69a4ec6..09ec6cdede 100644
--- a/ARMeilleure/State/NativeContext.cs
+++ b/ARMeilleure/State/NativeContext.cs
@@ -15,6 +15,9 @@ namespace ARMeilleure.State
             public fixed uint FpFlags[RegisterConsts.FpFlagsCount];
             public int Counter;
             public ulong CallAddress;
+            public ulong ExclusiveAddress;
+            public ulong ExclusiveValueLow;
+            public ulong ExclusiveValueHigh;
         }
 
         private static NativeCtxStorage _dummyStorage = new NativeCtxStorage();
@@ -26,6 +29,8 @@ namespace ARMeilleure.State
         public NativeContext(IJitMemoryAllocator allocator)
         {
             _block = allocator.Allocate((ulong)Unsafe.SizeOf<NativeCtxStorage>());
+
+            GetStorage().ExclusiveAddress = ulong.MaxValue;
         }
 
         public unsafe ulong GetX(int index)
@@ -162,6 +167,16 @@ namespace ARMeilleure.State
             return StorageOffset(ref _dummyStorage, ref _dummyStorage.CallAddress);
         }
 
+        public static int GetExclusiveAddressOffset()
+        {
+            return StorageOffset(ref _dummyStorage, ref _dummyStorage.ExclusiveAddress);
+        }
+
+        public static int GetExclusiveValueOffset()
+        {
+            return StorageOffset(ref _dummyStorage, ref _dummyStorage.ExclusiveValueLow);
+        }
+
         private static int StorageOffset<T>(ref NativeCtxStorage storage, ref T target)
         {
             return (int)Unsafe.ByteOffset(ref Unsafe.As<NativeCtxStorage, T>(ref storage), ref target);
diff --git a/ARMeilleure/Translation/Delegates.cs b/ARMeilleure/Translation/Delegates.cs
index addb15f6fb..7c1951ed10 100644
--- a/ARMeilleure/Translation/Delegates.cs
+++ b/ARMeilleure/Translation/Delegates.cs
@@ -103,7 +103,6 @@ namespace ARMeilleure.Translation
 
             SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.Break)));
             SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.CheckSynchronization)));
-            SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ClearExclusive)));
             SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntfrqEl0)));
             SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntpctEl0)));
             SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntvctEl0)));
@@ -118,33 +117,25 @@ namespace ARMeilleure.Translation
             SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetTpidr32))); // A32 only.
             SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetTpidrEl0)));
             SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetTpidrEl032))); // A32 only.
+            SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.MarkRegionAsModified)));
             SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadByte)));
-            SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadByteExclusive)));
             SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt16)));
-            SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt16Exclusive)));
             SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt32)));
-            SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt32Exclusive)));
             SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt64)));
-            SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt64Exclusive)));
             SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadVector128)));
-            SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadVector128Exclusive)));
             SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.SetFpcr)));
             SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.SetFpscr))); // A32 only.
             SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.SetFpsr)));
             SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.SetTpidrEl0)));
             SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.SetTpidrEl032))); // A32 only.
             SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.SupervisorCall)));
+            SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ThrowInvalidMemoryAccess)));
             SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.Undefined)));
             SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteByte)));
-            SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteByteExclusive)));
             SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt16)));
-            SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt16Exclusive)));
             SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt32)));
-            SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt32Exclusive)));
             SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt64)));
-            SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt64Exclusive)));
             SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteVector128)));
-            SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteVector128Exclusive)));
 
             SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.BinarySignedSatQAcc)));
             SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.BinarySignedSatQAdd)));
diff --git a/ARMeilleure/Translation/EmitterContext.cs b/ARMeilleure/Translation/EmitterContext.cs
index 74421854c8..7abab9e144 100644
--- a/ARMeilleure/Translation/EmitterContext.cs
+++ b/ARMeilleure/Translation/EmitterContext.cs
@@ -178,6 +178,16 @@ namespace ARMeilleure.Translation
             return Add(Instruction.CompareAndSwap, Local(desired.Type), address, expected, desired);
         }
 
+        public Operand CompareAndSwap16(Operand address, Operand expected, Operand desired)
+        {
+            return Add(Instruction.CompareAndSwap16, Local(OperandType.I32), address, expected, desired);
+        }
+
+        public Operand CompareAndSwap8(Operand address, Operand expected, Operand desired)
+        {
+            return Add(Instruction.CompareAndSwap8, Local(OperandType.I32), address, expected, desired);
+        }
+
         public Operand ConditionalSelect(Operand op1, Operand op2, Operand op3)
         {
             return Add(Instruction.ConditionalSelect, Local(op2.Type), op1, op2, op3);
diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs
index 1affa4279c..154af4ebf8 100644
--- a/ARMeilleure/Translation/PTC/Ptc.cs
+++ b/ARMeilleure/Translation/PTC/Ptc.cs
@@ -20,7 +20,7 @@ namespace ARMeilleure.Translation.PTC
     {
         private const string HeaderMagic = "PTChd";
 
-        private const int InternalVersion = 17; //! To be incremented manually for each change to the ARMeilleure project.
+        private const int InternalVersion = 18; //! To be incremented manually for each change to the ARMeilleure project.
 
         private const string BaseDir = "Ryujinx";
 
diff --git a/Ryujinx.Cpu/MemoryManager.cs b/Ryujinx.Cpu/MemoryManager.cs
index 3135750874..75ecca1ca7 100644
--- a/Ryujinx.Cpu/MemoryManager.cs
+++ b/Ryujinx.Cpu/MemoryManager.cs
@@ -276,14 +276,6 @@ namespace Ryujinx.Cpu
 
         private void ThrowMemoryNotContiguous() => throw new MemoryNotContiguousException();
 
-        // TODO: Remove that once we have proper 8-bits and 16-bits CAS.
-        public ref T GetRefNoChecks<T>(ulong va) where T : unmanaged
-        {
-            MarkRegionAsModified(va, (ulong)Unsafe.SizeOf<T>());
-
-            return ref _backingMemory.GetRef<T>(GetPhysicalAddressInternal(va));
-        }
-
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private bool IsContiguousAndMapped(ulong va, int size) => IsContiguous(va, size) && IsMapped(va);
 
@@ -497,7 +489,12 @@ namespace Ryujinx.Cpu
             return PteToPa(_pageTable.Read<ulong>((va / PageSize) * PteSize) & ~(0xffffUL << 48)) + (va & PageMask);
         }
 
-        private void MarkRegionAsModified(ulong va, ulong size)
+        /// <summary>
+        /// Marks a region of memory as modified by the CPU.
+        /// </summary>
+        /// <param name="va">Virtual address of the region</param>
+        /// <param name="size">Size of the region</param>
+        public void MarkRegionAsModified(ulong va, ulong size)
         {
             ulong endVa = (va + size + PageMask) & ~(ulong)PageMask;
 
@@ -532,9 +529,9 @@ namespace Ryujinx.Cpu
             return (ulong)((long)pte - _backingMemory.Pointer.ToInt64());
         }
 
-        public void Dispose()
-        {
-            _pageTable.Dispose();
-        }
+        /// <summary>
+        /// Disposes of resources used by the memory manager.
+        /// </summary>
+        public void Dispose() => _pageTable.Dispose();
     }
 }