diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7b75b1768..46675583a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -138,13 +138,13 @@ if (NOT ENABLE_GENERIC)
     if (MSVC)
         detect_architecture("_M_AMD64" x86_64)
         detect_architecture("_M_IX86" x86)
-        detect_architecture("_M_ARM" ARM)
-        detect_architecture("_M_ARM64" ARM64)
+        detect_architecture("_M_ARM" arm)
+        detect_architecture("_M_ARM64" arm64)
     else()
         detect_architecture("__x86_64__" x86_64)
         detect_architecture("__i386__" x86)
-        detect_architecture("__arm__" ARM)
-        detect_architecture("__aarch64__" ARM64)
+        detect_architecture("__arm__" arm)
+        detect_architecture("__aarch64__" arm64)
     endif()
 endif()
 if (NOT DEFINED ARCHITECTURE)
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
index e4b53f706..c2385c48e 100644
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -31,24 +31,27 @@ add_subdirectory(catch2)
 # Crypto++
 add_subdirectory(cryptopp)
 
+# fmt and Xbyak need to be added before dynarmic
+# libfmt
+add_subdirectory(fmt)
+
 # Xbyak
 if (ARCHITECTURE_x86_64)
     add_library(xbyak INTERFACE)
-    target_include_directories(xbyak SYSTEM INTERFACE ./xbyak/xbyak)
+    file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/xbyak/include)
+    file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/xbyak/xbyak DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/xbyak/include)
+    target_include_directories(xbyak SYSTEM INTERFACE ${CMAKE_CURRENT_BINARY_DIR}/xbyak/include)
     target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES)
 endif()
 
 # Dynarmic
-if (ARCHITECTURE_x86_64 OR ARCHITECTURE_ARM64)
+if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64)
     set(DYNARMIC_TESTS OFF)
     set(DYNARMIC_NO_BUNDLED_FMT ON)
     set(DYNARMIC_FRONTENDS "A32")
     add_subdirectory(dynarmic)
 endif()
 
-# libfmt
-add_subdirectory(fmt)
-
 # getopt
 if (MSVC)
     add_subdirectory(getopt)
diff --git a/externals/dynarmic b/externals/dynarmic
index 460617901..3946dcf00 160000
--- a/externals/dynarmic
+++ b/externals/dynarmic
@@ -1 +1 @@
-Subproject commit 460617901965ef7cd73cfbcf289fe367bf11c99e
+Subproject commit 3946dcf005c6e8f3d91cfb0bc5debfd0446daa39
diff --git a/externals/xbyak b/externals/xbyak
index c306b8e57..48457bfa0 160000
--- a/externals/xbyak
+++ b/externals/xbyak
@@ -1 +1 @@
-Subproject commit c306b8e5786eeeb87b8925a8af5c3bf057ff5a90
+Subproject commit 48457bfa0ded67bb4ae2d4c141c36b35469257ee
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 2860a91e5..62e47e65a 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -58,6 +58,7 @@ add_library(common STATIC
     announce_multiplayer_room.h
     archives.h
     assert.h
+    atomic_ops.h
     detached_tasks.cpp
     detached_tasks.h
     bit_field.h
@@ -127,7 +128,7 @@ if(ARCHITECTURE_x86_64)
             x64/xbyak_abi.h
             x64/xbyak_util.h
     )
-elseif(ARCHITECTURE_ARM64)
+elseif(ARCHITECTURE_arm64)
     target_sources(common
         PRIVATE
             aarch64/cpu_detect.cpp
diff --git a/src/common/atomic_ops.h b/src/common/atomic_ops.h
new file mode 100644
index 000000000..c18bb33c4
--- /dev/null
+++ b/src/common/atomic_ops.h
@@ -0,0 +1,166 @@
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include "common/common_types.h"
+
+#if _MSC_VER
+#include <intrin.h>
+#else
+#include <cstring>
+#endif
+
+namespace Common {
+
+#if _MSC_VER
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) {
+    const u8 result =
+        _InterlockedCompareExchange8(reinterpret_cast<volatile char*>(pointer), value, expected);
+    return result == expected;
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected) {
+    const u16 result =
+        _InterlockedCompareExchange16(reinterpret_cast<volatile short*>(pointer), value, expected);
+    return result == expected;
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected) {
+    const u32 result =
+        _InterlockedCompareExchange(reinterpret_cast<volatile long*>(pointer), value, expected);
+    return result == expected;
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected) {
+    const u64 result = _InterlockedCompareExchange64(reinterpret_cast<volatile __int64*>(pointer),
+                                                     value, expected);
+    return result == expected;
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected) {
+    return _InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), value[1],
+                                          value[0],
+                                          reinterpret_cast<__int64*>(expected.data())) != 0;
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected,
+                                               u8& actual) {
+    actual =
+        _InterlockedCompareExchange8(reinterpret_cast<volatile char*>(pointer), value, expected);
+    return actual == expected;
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected,
+                                               u16& actual) {
+    actual =
+        _InterlockedCompareExchange16(reinterpret_cast<volatile short*>(pointer), value, expected);
+    return actual == expected;
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected,
+                                               u32& actual) {
+    actual =
+        _InterlockedCompareExchange(reinterpret_cast<volatile long*>(pointer), value, expected);
+    return actual == expected;
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected,
+                                               u64& actual) {
+    actual = _InterlockedCompareExchange64(reinterpret_cast<volatile __int64*>(pointer), value,
+                                           expected);
+    return actual == expected;
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected,
+                                               u128& actual) {
+    const bool result =
+        _InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), value[1],
+                                       value[0], reinterpret_cast<__int64*>(expected.data())) != 0;
+    actual = expected;
+    return result;
+}
+
+[[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) {
+    u128 result{};
+    _InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), result[1],
+                                   result[0], reinterpret_cast<__int64*>(result.data()));
+    return result;
+}
+
+#else
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) {
+    return __sync_bool_compare_and_swap(pointer, expected, value);
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected) {
+    return __sync_bool_compare_and_swap(pointer, expected, value);
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected) {
+    return __sync_bool_compare_and_swap(pointer, expected, value);
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected) {
+    return __sync_bool_compare_and_swap(pointer, expected, value);
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected) {
+    unsigned __int128 value_a;
+    unsigned __int128 expected_a;
+    std::memcpy(&value_a, value.data(), sizeof(u128));
+    std::memcpy(&expected_a, expected.data(), sizeof(u128));
+    return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a);
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected,
+                                               u8& actual) {
+    actual = __sync_val_compare_and_swap(pointer, expected, value);
+    return actual == expected;
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected,
+                                               u16& actual) {
+    actual = __sync_val_compare_and_swap(pointer, expected, value);
+    return actual == expected;
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected,
+                                               u32& actual) {
+    actual = __sync_val_compare_and_swap(pointer, expected, value);
+    return actual == expected;
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected,
+                                               u64& actual) {
+    actual = __sync_val_compare_and_swap(pointer, expected, value);
+    return actual == expected;
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected,
+                                               u128& actual) {
+    unsigned __int128 value_a;
+    unsigned __int128 expected_a;
+    unsigned __int128 actual_a;
+    std::memcpy(&value_a, value.data(), sizeof(u128));
+    std::memcpy(&expected_a, expected.data(), sizeof(u128));
+    actual_a = __sync_val_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a);
+    std::memcpy(actual.data(), &actual_a, sizeof(u128));
+    return actual_a == expected_a;
+}
+
+[[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) {
+    unsigned __int128 zeros_a = 0;
+    unsigned __int128 result_a =
+        __sync_val_compare_and_swap((unsigned __int128*)pointer, zeros_a, zeros_a);
+
+    u128 result;
+    std::memcpy(result.data(), &result_a, sizeof(u128));
+    return result;
+}
+
+#endif
+
+} // namespace Common
diff --git a/src/common/common_types.h b/src/common/common_types.h
index ee18eac81..808d2c5a7 100644
--- a/src/common/common_types.h
+++ b/src/common/common_types.h
@@ -24,6 +24,7 @@
 
 #pragma once
 
+#include <array>
 #include <cstdint>
 
 #ifdef _MSC_VER
@@ -50,6 +51,9 @@ typedef double f64; ///< 64-bit floating point
 typedef u32 VAddr; ///< Represents a pointer in the userspace virtual address space.
 typedef u32 PAddr; ///< Represents a pointer in the ARM11 physical address space.
 
+using u128 = std::array<std::uint64_t, 2>;
+static_assert(sizeof(u128) == 16, "u128 must be 128 bits wide");
+
 // An inheritable class to disallow the copy constructor and operator= functions
 class NonCopyable {
 protected:
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h
index 235952a1e..2326c87c0 100644
--- a/src/common/x64/xbyak_abi.h
+++ b/src/common/x64/xbyak_abi.h
@@ -6,7 +6,7 @@
 
 #include <bitset>
 #include <initializer_list>
-#include <xbyak.h>
+#include <xbyak/xbyak.h>
 #include "common/assert.h"
 
 namespace Common::X64 {
diff --git a/src/common/x64/xbyak_util.h b/src/common/x64/xbyak_util.h
index 5cc8a8c76..461ca0516 100644
--- a/src/common/x64/xbyak_util.h
+++ b/src/common/x64/xbyak_util.h
@@ -5,7 +5,7 @@
 #pragma once
 
 #include <type_traits>
-#include <xbyak.h>
+#include <xbyak/xbyak.h>
 #include "common/x64/xbyak_abi.h"
 
 namespace Common::X64 {
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 19cbf7f6f..e1738e9f6 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -12,6 +12,8 @@ add_library(core STATIC
     arm/dyncom/arm_dyncom_thumb.h
     arm/dyncom/arm_dyncom_trans.cpp
     arm/dyncom/arm_dyncom_trans.h
+    arm/exclusive_monitor.cpp
+    arm/exclusive_monitor.h
     arm/skyeye_common/arm_regformat.h
     arm/skyeye_common/armstate.cpp
     arm/skyeye_common/armstate.h
@@ -482,12 +484,14 @@ if (ENABLE_WEB_SERVICE)
     endif()
 endif()
 
-if (ARCHITECTURE_x86_64 OR ARCHITECTURE_ARM64)
+if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64)
     target_sources(core PRIVATE
         arm/dynarmic/arm_dynarmic.cpp
         arm/dynarmic/arm_dynarmic.h
         arm/dynarmic/arm_dynarmic_cp15.cpp
         arm/dynarmic/arm_dynarmic_cp15.h
+        arm/dynarmic/arm_exclusive_monitor.cpp
+        arm/dynarmic/arm_exclusive_monitor.h
     )
     target_link_libraries(core PRIVATE dynarmic)
 endif()
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index ccbfd13c4..22f8fe007 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -122,6 +122,9 @@ public:
      */
     virtual void InvalidateCacheRange(u32 start_address, std::size_t length) = 0;
 
+    /// Clears the exclusive monitor's state.
+    virtual void ClearExclusiveState() = 0;
+
     /// Notify CPU emulation that page tables have changed
     virtual void SetPageTable(const std::shared_ptr<Memory::PageTable>& page_table) = 0;
 
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index 3bfd27b02..71d4d2f2d 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -3,12 +3,14 @@
 // Refer to the license.txt file included.
 
 #include <cstring>
-#include <dynarmic/A32/a32.h>
-#include <dynarmic/A32/context.h>
+#include <dynarmic/interface/A32/a32.h>
+#include <dynarmic/interface/A32/context.h>
+#include <dynarmic/interface/optimization_flags.h>
 #include "common/assert.h"
 #include "common/microprofile.h"
 #include "core/arm/dynarmic/arm_dynarmic.h"
 #include "core/arm/dynarmic/arm_dynarmic_cp15.h"
+#include "core/arm/dynarmic/arm_exclusive_monitor.h"
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/gdbstub/gdbstub.h"
@@ -100,10 +102,23 @@ public:
         memory.Write64(vaddr, value);
     }
 
+    bool MemoryWriteExclusive8(u32 vaddr, u8 value, u8 expected) override {
+        return memory.WriteExclusive8(vaddr, value, expected);
+    }
+    bool MemoryWriteExclusive16(u32 vaddr, u16 value, u16 expected) override {
+        return memory.WriteExclusive16(vaddr, value, expected);
+    }
+    bool MemoryWriteExclusive32(u32 vaddr, u32 value, u32 expected) override {
+        return memory.WriteExclusive32(vaddr, value, expected);
+    }
+    bool MemoryWriteExclusive64(u32 vaddr, u64 value, u64 expected) override {
+        return memory.WriteExclusive64(vaddr, value, expected);
+    }
+
     void InterpreterFallback(VAddr pc, std::size_t num_instructions) override {
         // Should never happen.
         UNREACHABLE_MSG("InterpeterFallback reached with pc = 0x{:08x}, code = 0x{:08x}, num = {}",
-                        pc, MemoryReadCode(pc), num_instructions);
+                        pc, MemoryReadCode(pc).value(), num_instructions);
     }
 
     void CallSVC(std::uint32_t swi) override {
@@ -114,6 +129,8 @@ public:
         switch (exception) {
         case Dynarmic::A32::Exception::UndefinedInstruction:
         case Dynarmic::A32::Exception::UnpredictableInstruction:
+        case Dynarmic::A32::Exception::DecodeError:
+        case Dynarmic::A32::Exception::NoExecuteFault:
             break;
         case Dynarmic::A32::Exception::Breakpoint:
             if (GDBStub::IsConnected()) {
@@ -130,10 +147,11 @@ public:
         case Dynarmic::A32::Exception::Yield:
         case Dynarmic::A32::Exception::PreloadData:
         case Dynarmic::A32::Exception::PreloadDataWithIntentToWrite:
+        case Dynarmic::A32::Exception::PreloadInstruction:
             return;
         }
         ASSERT_MSG(false, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", exception,
-                   pc, MemoryReadCode(pc));
+                   pc, MemoryReadCode(pc).value());
     }
 
     void AddTicks(std::uint64_t ticks) override {
@@ -149,10 +167,12 @@ public:
     Memory::MemorySystem& memory;
 };
 
-ARM_Dynarmic::ARM_Dynarmic(Core::System* system, Memory::MemorySystem& memory, u32 id,
-                           std::shared_ptr<Core::Timing::Timer> timer)
-    : ARM_Interface(id, timer), system(*system), memory(memory),
-      cb(std::make_unique<DynarmicUserCallbacks>(*this)) {
+ARM_Dynarmic::ARM_Dynarmic(Core::System* system_, Memory::MemorySystem& memory_, u32 core_id_,
+                           std::shared_ptr<Core::Timing::Timer> timer_,
+                           Core::ExclusiveMonitor& exclusive_monitor_)
+    : ARM_Interface(core_id_, timer_), system(*system_), memory(memory_),
+      cb(std::make_unique<DynarmicUserCallbacks>(*this)),
+      exclusive_monitor{dynamic_cast<Core::DynarmicExclusiveMonitor&>(exclusive_monitor_)} {
     SetPageTable(memory.GetCurrentPageTable());
 }
 
@@ -208,6 +228,7 @@ u32 ARM_Dynarmic::GetVFPSystemReg(VFPSystemRegister reg) const {
     default:
         UNREACHABLE_MSG("Unknown VFP system register: {}", reg);
     }
+    return UINT_MAX;
 }
 
 void ARM_Dynarmic::SetVFPSystemReg(VFPSystemRegister reg, u32 value) {
@@ -291,6 +312,10 @@ void ARM_Dynarmic::InvalidateCacheRange(u32 start_address, std::size_t length) {
     jit->InvalidateCacheRange(start_address, length);
 }
 
+void ARM_Dynarmic::ClearExclusiveState() {
+    jit->ClearExclusiveState();
+}
+
 std::shared_ptr<Memory::PageTable> ARM_Dynarmic::GetPageTable() const {
     return current_page_table;
 }
@@ -328,6 +353,11 @@ std::unique_ptr<Dynarmic::A32::Jit> ARM_Dynarmic::MakeJit() {
     config.page_table = &current_page_table->GetPointerArray();
     config.coprocessors[15] = std::make_shared<DynarmicCP15>(cp15_state);
     config.define_unpredictable_behaviour = true;
+
+    // Multi-process state
+    config.processor_id = GetID();
+    config.global_monitor = &exclusive_monitor.monitor;
+
     return std::make_unique<Dynarmic::A32::Jit>(config);
 }
 
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h
index 7575e9a2a..45d7a4ac6 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -6,7 +6,7 @@
 
 #include <map>
 #include <memory>
-#include <dynarmic/A32/a32.h>
+#include <dynarmic/interface/A32/a32.h>
 #include "common/common_types.h"
 #include "core/arm/arm_interface.h"
 #include "core/arm/dynarmic/arm_dynarmic_cp15.h"
@@ -17,15 +17,18 @@ class MemorySystem;
 } // namespace Memory
 
 namespace Core {
+class DynarmicExclusiveMonitor;
+class ExclusiveMonitor;
 class System;
-}
+} // namespace Core
 
 class DynarmicUserCallbacks;
 
 class ARM_Dynarmic final : public ARM_Interface {
 public:
-    ARM_Dynarmic(Core::System* system, Memory::MemorySystem& memory, u32 id,
-                 std::shared_ptr<Core::Timing::Timer> timer);
+    explicit ARM_Dynarmic(Core::System* system_, Memory::MemorySystem& memory_, u32 core_id_,
+                          std::shared_ptr<Core::Timing::Timer> timer,
+                          Core::ExclusiveMonitor& exclusive_monitor_);
     ~ARM_Dynarmic() override;
 
     void Run() override;
@@ -52,6 +55,7 @@ public:
 
     void ClearInstructionCache() override;
     void InvalidateCacheRange(u32 start_address, std::size_t length) override;
+    void ClearExclusiveState() override;
     void SetPageTable(const std::shared_ptr<Memory::PageTable>& page_table) override;
     void PurgeState() override;
 
@@ -69,6 +73,7 @@ private:
 
     u32 fpexc = 0;
     CP15State cp15_state;
+    Core::DynarmicExclusiveMonitor& exclusive_monitor;
 
     Dynarmic::A32::Jit* jit = nullptr;
     std::shared_ptr<Memory::PageTable> current_page_table = nullptr;
diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.h b/src/core/arm/dynarmic/arm_dynarmic_cp15.h
index 30ab08ac8..9ebe9c644 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_cp15.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.h
@@ -5,7 +5,7 @@
 #pragma once
 
 #include <memory>
-#include <dynarmic/A32/coprocessor.h>
+#include <dynarmic/interface/A32/coprocessor.h>
 #include "common/common_types.h"
 
 struct CP15State {
diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.cpp b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp
new file mode 100644
index 000000000..b8ac82582
--- /dev/null
+++ b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp
@@ -0,0 +1,59 @@
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "core/arm/dynarmic/arm_exclusive_monitor.h"
+#include "core/memory.h"
+
+namespace Core {
+
+DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::MemorySystem& memory_,
+                                                   std::size_t core_count_)
+    : monitor{core_count_}, memory{memory_} {}
+
+DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default;
+
+u8 DynarmicExclusiveMonitor::ExclusiveRead8(std::size_t core_index, VAddr addr) {
+    return monitor.ReadAndMark<u8>(core_index, addr, [&]() -> u8 { return memory.Read8(addr); });
+}
+
+u16 DynarmicExclusiveMonitor::ExclusiveRead16(std::size_t core_index, VAddr addr) {
+    return monitor.ReadAndMark<u16>(core_index, addr, [&]() -> u16 { return memory.Read16(addr); });
+}
+
+u32 DynarmicExclusiveMonitor::ExclusiveRead32(std::size_t core_index, VAddr addr) {
+    return monitor.ReadAndMark<u32>(core_index, addr, [&]() -> u32 { return memory.Read32(addr); });
+}
+
+u64 DynarmicExclusiveMonitor::ExclusiveRead64(std::size_t core_index, VAddr addr) {
+    return monitor.ReadAndMark<u64>(core_index, addr, [&]() -> u64 { return memory.Read64(addr); });
+}
+
+void DynarmicExclusiveMonitor::ClearExclusive(std::size_t core_index) {
+    monitor.ClearProcessor(core_index);
+}
+
+bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) {
+    return monitor.DoExclusiveOperation<u8>(core_index, vaddr, [&](u8 expected) -> bool {
+        return memory.WriteExclusive8(vaddr, value, expected);
+    });
+}
+
+bool DynarmicExclusiveMonitor::ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) {
+    return monitor.DoExclusiveOperation<u16>(core_index, vaddr, [&](u16 expected) -> bool {
+        return memory.WriteExclusive16(vaddr, value, expected);
+    });
+}
+
+bool DynarmicExclusiveMonitor::ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) {
+    return monitor.DoExclusiveOperation<u32>(core_index, vaddr, [&](u32 expected) -> bool {
+        return memory.WriteExclusive32(vaddr, value, expected);
+    });
+}
+
+bool DynarmicExclusiveMonitor::ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) {
+    return monitor.DoExclusiveOperation<u64>(core_index, vaddr, [&](u64 expected) -> bool {
+        return memory.WriteExclusive64(vaddr, value, expected);
+    });
+}
+
+} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.h b/src/core/arm/dynarmic/arm_exclusive_monitor.h
new file mode 100644
index 000000000..05f9f391f
--- /dev/null
+++ b/src/core/arm/dynarmic/arm_exclusive_monitor.h
@@ -0,0 +1,40 @@
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <dynarmic/interface/exclusive_monitor.h>
+
+#include "common/common_types.h"
+#include "core/arm/dynarmic/arm_dynarmic.h"
+#include "core/arm/exclusive_monitor.h"
+
+namespace Memory {
+class MemorySystem;
+}
+
+namespace Core {
+
+class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
+public:
+    explicit DynarmicExclusiveMonitor(Memory::MemorySystem& memory_, std::size_t core_count_);
+    ~DynarmicExclusiveMonitor() override;
+
+    u8 ExclusiveRead8(std::size_t core_index, VAddr addr) override;
+    u16 ExclusiveRead16(std::size_t core_index, VAddr addr) override;
+    u32 ExclusiveRead32(std::size_t core_index, VAddr addr) override;
+    u64 ExclusiveRead64(std::size_t core_index, VAddr addr) override;
+    void ClearExclusive(std::size_t core_index) override;
+
+    bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override;
+    bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) override;
+    bool ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) override;
+    bool ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) override;
+
+private:
+    friend class ::ARM_Dynarmic;
+    Dynarmic::ExclusiveMonitor monitor;
+    Memory::MemorySystem& memory;
+};
+
+} // namespace Core
diff --git a/src/core/arm/dyncom/arm_dyncom.h b/src/core/arm/dyncom/arm_dyncom.h
index 1452216c2..75f25bebf 100644
--- a/src/core/arm/dyncom/arm_dyncom.h
+++ b/src/core/arm/dyncom/arm_dyncom.h
@@ -30,6 +30,7 @@ public:
 
     void ClearInstructionCache() override;
     void InvalidateCacheRange(u32 start_address, std::size_t length) override;
+    void ClearExclusiveState() override{};
 
     void SetPC(u32 pc) override;
     u32 GetPC() const override;
diff --git a/src/core/arm/exclusive_monitor.cpp b/src/core/arm/exclusive_monitor.cpp
new file mode 100644
index 000000000..5c8a3bc20
--- /dev/null
+++ b/src/core/arm/exclusive_monitor.cpp
@@ -0,0 +1,26 @@
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64)
+#include "core/arm/dynarmic/arm_exclusive_monitor.h"
+#endif
+#include "core/arm/exclusive_monitor.h"
+#include "core/memory.h"
+#include "core/settings.h"
+
+namespace Core {
+
+ExclusiveMonitor::~ExclusiveMonitor() = default;
+
+std::unique_ptr<Core::ExclusiveMonitor> MakeExclusiveMonitor(Memory::MemorySystem& memory,
+                                                             std::size_t num_cores) {
+#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64)
+    if (Settings::values.use_cpu_jit) {
+        return std::make_unique<Core::DynarmicExclusiveMonitor>(memory, num_cores);
+    }
+#endif
+    // TODO(merry): Passthrough exclusive monitor
+    return nullptr;
+}
+
+} // namespace Core
diff --git a/src/core/arm/exclusive_monitor.h b/src/core/arm/exclusive_monitor.h
new file mode 100644
index 000000000..c91a40406
--- /dev/null
+++ b/src/core/arm/exclusive_monitor.h
@@ -0,0 +1,35 @@
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <memory>
+
+#include "common/common_types.h"
+
+namespace Memory {
+class MemorySystem;
+}
+
+namespace Core {
+
+class ExclusiveMonitor {
+public:
+    virtual ~ExclusiveMonitor();
+
+    virtual u8 ExclusiveRead8(std::size_t core_index, VAddr addr) = 0;
+    virtual u16 ExclusiveRead16(std::size_t core_index, VAddr addr) = 0;
+    virtual u32 ExclusiveRead32(std::size_t core_index, VAddr addr) = 0;
+    virtual u64 ExclusiveRead64(std::size_t core_index, VAddr addr) = 0;
+    virtual void ClearExclusive(std::size_t core_index) = 0;
+
+    virtual bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) = 0;
+    virtual bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) = 0;
+    virtual bool ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) = 0;
+    virtual bool ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) = 0;
+};
+
+std::unique_ptr<Core::ExclusiveMonitor> MakeExclusiveMonitor(Memory::MemorySystem& memory,
+                                                             std::size_t num_cores);
+
+} // namespace Core
diff --git a/src/core/core.cpp b/src/core/core.cpp
index ec29ab002..1ed72a3d9 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -13,7 +13,8 @@
 #include "common/logging/log.h"
 #include "common/texture.h"
 #include "core/arm/arm_interface.h"
-#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_ARM64)
+#include "core/arm/exclusive_monitor.h"
+#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64)
 #include "core/arm/dynarmic/arm_dynarmic.h"
 #endif
 #include "core/arm/dyncom/arm_dyncom.h"
@@ -364,11 +365,12 @@ System::ResultStatus System::Init(Frontend::EmuWindow& emu_window, u32 system_mo
     kernel = std::make_unique<Kernel::KernelSystem>(
         *memory, *timing, [this] { PrepareReschedule(); }, system_mode, num_cores, n3ds_mode);
 
+    exclusive_monitor = MakeExclusiveMonitor(*memory, num_cores);
     if (Settings::values.use_cpu_jit) {
-#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_ARM64)
+#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64)
         for (u32 i = 0; i < num_cores; ++i) {
-            cpu_cores.push_back(
-                std::make_shared<ARM_Dynarmic>(this, *memory, i, timing->GetTimer(i)));
+            cpu_cores.push_back(std::make_shared<ARM_Dynarmic>(
+                this, *memory, i, timing->GetTimer(i), *exclusive_monitor));
         }
 #else
         for (u32 i = 0; i < num_cores; ++i) {
@@ -543,6 +545,7 @@ void System::Shutdown(bool is_deserializing) {
     dsp_core.reset();
     kernel.reset();
     cpu_cores.clear();
+    exclusive_monitor.reset();
     timing.reset();
 
     if (video_dumper && video_dumper->IsDumping()) {
diff --git a/src/core/core.h b/src/core/core.h
index 47edb49f3..1ea2d9b55 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -61,6 +61,7 @@ class RendererBase;
 
 namespace Core {
 
+class ExclusiveMonitor;
 class Timing;
 
 class System {
@@ -363,6 +364,8 @@ private:
     std::unique_ptr<Kernel::KernelSystem> kernel;
     std::unique_ptr<Timing> timing;
 
+    std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor;
+
 private:
     static System s_instance;
 
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 178e5383a..5f4963485 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -9,6 +9,7 @@
 #include "audio_core/dsp_interface.h"
 #include "common/archives.h"
 #include "common/assert.h"
+#include "common/atomic_ops.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "common/swap.h"
@@ -373,6 +374,40 @@ void MemorySystem::Write(const VAddr vaddr, const T data) {
     }
 }
 
+template <typename T>
+bool MemorySystem::WriteExclusive(const VAddr vaddr, const T data, const T expected) {
+    u8* page_pointer = impl->current_page_table->pointers[vaddr >> PAGE_BITS];
+
+    if (page_pointer) {
+        const auto volatile_pointer =
+            reinterpret_cast<volatile T*>(&page_pointer[vaddr & PAGE_MASK]);
+        return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
+    }
+
+    PageType type = impl->current_page_table->attributes[vaddr >> PAGE_BITS];
+    switch (type) {
+    case PageType::Unmapped:
+        LOG_ERROR(HW_Memory, "unmapped Write{} 0x{:08X} @ 0x{:08X} at PC 0x{:08X}",
+                  sizeof(data) * 8, (u32)data, vaddr, Core::GetRunningCore().GetPC());
+        return true;
+    case PageType::Memory:
+        ASSERT_MSG(false, "Mapped memory page without a pointer @ {:08X}", vaddr);
+        return true;
+    case PageType::RasterizerCachedMemory: {
+        RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate);
+        const auto volatile_pointer =
+            reinterpret_cast<volatile T*>(GetPointerForRasterizerCache(vaddr).GetPtr());
+        return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
+    }
+    case PageType::Special:
+        WriteMMIO<T>(GetMMIOHandler(*impl->current_page_table, vaddr), vaddr, data);
+        return false;
+    default:
+        UNREACHABLE();
+    }
+    return true;
+}
+
 bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) {
     auto& page_table = *process.vm_manager.page_table;
 
@@ -732,6 +767,22 @@ void MemorySystem::Write64(const VAddr addr, const u64 data) {
     Write<u64_le>(addr, data);
 }
 
+bool MemorySystem::WriteExclusive8(const VAddr addr, const u8 data, const u8 expected) {
+    return WriteExclusive<u8>(addr, data, expected);
+}
+
+bool MemorySystem::WriteExclusive16(const VAddr addr, const u16 data, const u16 expected) {
+    return WriteExclusive<u16_le>(addr, data, expected);
+}
+
+bool MemorySystem::WriteExclusive32(const VAddr addr, const u32 data, const u32 expected) {
+    return WriteExclusive<u32_le>(addr, data, expected);
+}
+
+bool MemorySystem::WriteExclusive64(const VAddr addr, const u64 data, const u64 expected) {
+    return WriteExclusive<u64_le>(addr, data, expected);
+}
+
 void MemorySystem::WriteBlock(const Kernel::Process& process, const VAddr dest_addr,
                               const void* src_buffer, const std::size_t size) {
     auto& page_table = *process.vm_manager.page_table;
diff --git a/src/core/memory.h b/src/core/memory.h
index 01b3777b0..6af279d94 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -327,6 +327,23 @@ public:
     void Write32(VAddr addr, u32 data);
     void Write64(VAddr addr, u64 data);
 
+    /**
+     * Writes a {8, 16, 32, 64}-bit unsigned integer to the given virtual address in
+     * the current process' address space if and only if the address contains
+     * the expected value. This operation is atomic.
+     *
+     * @param addr The virtual address to write the X-bit unsigned integer to.
+     * @param data The X-bit unsigned integer to write to the given virtual address.
+     * @param expected The X-bit unsigned integer to check against the given virtual address.
+     * @returns true if the operation failed
+     *
+     * @post The memory range [addr, sizeof(data)) contains the given data value.
+     */
+    bool WriteExclusive8(const VAddr addr, const u8 data, const u8 expected);
+    bool WriteExclusive16(const VAddr addr, const u16 data, const u16 expected);
+    bool WriteExclusive32(const VAddr addr, const u32 data, const u32 expected);
+    bool WriteExclusive64(const VAddr addr, const u64 data, const u64 expected);
+
     void ReadBlock(const Kernel::Process& process, VAddr src_addr, void* dest_buffer,
                    std::size_t size);
     void WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
@@ -384,6 +401,9 @@ private:
     template <typename T>
     void Write(const VAddr vaddr, const T data);
 
+    template <typename T>
+    bool WriteExclusive(const VAddr vaddr, const T data, const T expected);
+
     /**
      * Gets the pointer for virtual memory where the page is marked as RasterizerCachedMemory.
      * This is used to access the memory where the page pointer is nullptr due to rasterizer cache.
diff --git a/src/input_common/gcadapter/gc_adapter.cpp b/src/input_common/gcadapter/gc_adapter.cpp
index 11a352416..2cb0396a3 100644
--- a/src/input_common/gcadapter/gc_adapter.cpp
+++ b/src/input_common/gcadapter/gc_adapter.cpp
@@ -4,6 +4,7 @@
 
 #include <chrono>
 #include <thread>
+#include <vector>
 
 #ifdef _MSC_VER
 #pragma warning(push)
diff --git a/src/video_core/shader/shader_jit_x64_compiler.h b/src/video_core/shader/shader_jit_x64_compiler.h
index 573bdf8d3..b6e62f01e 100644
--- a/src/video_core/shader/shader_jit_x64_compiler.h
+++ b/src/video_core/shader/shader_jit_x64_compiler.h
@@ -11,7 +11,7 @@
 #include <utility>
 #include <vector>
 #include <nihstro/shader_bytecode.h>
-#include <xbyak.h>
+#include <xbyak/xbyak.h>
 #include "common/bit_set.h"
 #include "common/common_types.h"
 #include "video_core/shader/shader.h"