diff --git a/ChocolArm64/Decoders/Decoder.cs b/ChocolArm64/Decoders/Decoder.cs
index 6c60e1fe5e..2b1954129e 100644
--- a/ChocolArm64/Decoders/Decoder.cs
+++ b/ChocolArm64/Decoders/Decoder.cs
@@ -168,9 +168,59 @@ namespace ChocolArm64.Decoders
         {
             //Note: On ARM32, most ALU operations can write to R15 (PC),
             //so we must consider such operations as a branch in potential aswell.
-            return  opCode is IOpCodeBImm32 ||
-                    opCode is IOpCodeBReg32 ||
-                   (opCode is IOpCodeAlu32 op && op.Rd == RegisterAlias.Aarch32Pc);
+            if (opCode is IOpCode32Alu opAlu && opAlu.Rd == RegisterAlias.Aarch32Pc)
+            {
+                return true;
+            }
+
+            //Same thing for memory operations. We have the cases where PC is a target
+            //register (Rt == 15 or (mask & (1 << 15)) != 0), and cases where there is
+            //a write back to PC (wback == true && Rn == 15), however the later may
+            //be "undefined" depending on the CPU, so compilers should not produce that.
+            if (opCode is IOpCode32Mem || opCode is IOpCode32MemMult)
+            {
+                int rt, rn;
+
+                bool wBack, isLoad;
+
+                if (opCode is IOpCode32Mem opMem)
+                {
+                    rt     = opMem.Rt;
+                    rn     = opMem.Rn;
+                    wBack  = opMem.WBack;
+                    isLoad = opMem.IsLoad;
+
+                    //For the dual load, we also need to take into account the
+                    //case were Rt2 == 15 (PC).
+                    if (rt == 14 && opMem.Emitter == InstEmit32.Ldrd)
+                    {
+                        rt = RegisterAlias.Aarch32Pc;
+                    }
+                }
+                else if (opCode is IOpCode32MemMult opMemMult)
+                {
+                    const int pcMask = 1 << RegisterAlias.Aarch32Pc;
+
+                    rt     = (opMemMult.RegisterMask & pcMask) != 0 ? RegisterAlias.Aarch32Pc : 0;
+                    rn     =  opMemMult.Rn;
+                    wBack  =  opMemMult.PostOffset != 0;
+                    isLoad =  opMemMult.IsLoad;
+                }
+                else
+                {
+                    throw new NotImplementedException($"The type \"{opCode.GetType().Name}\" is not implemented on the decoder.");
+                }
+
+                if ((rt == RegisterAlias.Aarch32Pc && isLoad) ||
+                    (rn == RegisterAlias.Aarch32Pc && wBack))
+                {
+                    return true;
+                }
+            }
+
+            //Explicit branch instructions.
+            return opCode is IOpCode32BImm ||
+                   opCode is IOpCode32BReg;
         }
 
         private static bool IsException(OpCode64 opCode)
diff --git a/ChocolArm64/Decoders/IOpCodeAlu32.cs b/ChocolArm64/Decoders/IOpCode32Alu.cs
similarity index 76%
rename from ChocolArm64/Decoders/IOpCodeAlu32.cs
rename to ChocolArm64/Decoders/IOpCode32Alu.cs
index 9a46488653..d6f6d82a4f 100644
--- a/ChocolArm64/Decoders/IOpCodeAlu32.cs
+++ b/ChocolArm64/Decoders/IOpCode32Alu.cs
@@ -1,6 +1,6 @@
 namespace ChocolArm64.Decoders
 {
-    interface IOpCodeAlu32 : IOpCode32
+    interface IOpCode32Alu : IOpCode32
     {
         int Rd { get; }
         int Rn { get; }
diff --git a/ChocolArm64/Decoders/IOpCode32BImm.cs b/ChocolArm64/Decoders/IOpCode32BImm.cs
new file mode 100644
index 0000000000..b69c1e369f
--- /dev/null
+++ b/ChocolArm64/Decoders/IOpCode32BImm.cs
@@ -0,0 +1,4 @@
+namespace ChocolArm64.Decoders
+{
+    interface IOpCode32BImm : IOpCode32, IOpCodeBImm { }
+}
\ No newline at end of file
diff --git a/ChocolArm64/Decoders/IOpCodeBReg32.cs b/ChocolArm64/Decoders/IOpCode32BReg.cs
similarity index 63%
rename from ChocolArm64/Decoders/IOpCodeBReg32.cs
rename to ChocolArm64/Decoders/IOpCode32BReg.cs
index fb9d94eafe..a498b02d7c 100644
--- a/ChocolArm64/Decoders/IOpCodeBReg32.cs
+++ b/ChocolArm64/Decoders/IOpCode32BReg.cs
@@ -1,6 +1,6 @@
 namespace ChocolArm64.Decoders
 {
-    interface IOpCodeBReg32 : IOpCode32
+    interface IOpCode32BReg : IOpCode32
     {
         int Rm { get; }
     }
diff --git a/ChocolArm64/Decoders/IOpCode32Mem.cs b/ChocolArm64/Decoders/IOpCode32Mem.cs
new file mode 100644
index 0000000000..8ed25add88
--- /dev/null
+++ b/ChocolArm64/Decoders/IOpCode32Mem.cs
@@ -0,0 +1,12 @@
+namespace ChocolArm64.Decoders
+{
+    interface IOpCode32Mem : IOpCode32
+    {
+        int Rt { get; }
+        int Rn { get; }
+
+        bool WBack { get; }
+
+        bool IsLoad { get; }
+    }
+}
\ No newline at end of file
diff --git a/ChocolArm64/Decoders/IOpCode32MemMult.cs b/ChocolArm64/Decoders/IOpCode32MemMult.cs
new file mode 100644
index 0000000000..d611c53bff
--- /dev/null
+++ b/ChocolArm64/Decoders/IOpCode32MemMult.cs
@@ -0,0 +1,13 @@
+namespace ChocolArm64.Decoders
+{
+    interface IOpCode32MemMult : IOpCode32
+    {
+        int Rn { get; }
+
+        int RegisterMask { get; }
+
+        int PostOffset { get; }
+
+        bool IsLoad { get; }
+    }
+}
\ No newline at end of file
diff --git a/ChocolArm64/Decoders/IOpCodeBImm32.cs b/ChocolArm64/Decoders/IOpCodeBImm32.cs
deleted file mode 100644
index cc8248f27b..0000000000
--- a/ChocolArm64/Decoders/IOpCodeBImm32.cs
+++ /dev/null
@@ -1,4 +0,0 @@
-namespace ChocolArm64.Decoders
-{
-    interface IOpCodeBImm32 : IOpCode32, IOpCodeBImm { }
-}
\ No newline at end of file
diff --git a/ChocolArm64/Decoders/OpCodeAlu32.cs b/ChocolArm64/Decoders/OpCode32Alu.cs
similarity index 78%
rename from ChocolArm64/Decoders/OpCodeAlu32.cs
rename to ChocolArm64/Decoders/OpCode32Alu.cs
index 9612d9c29d..0cf066966b 100644
--- a/ChocolArm64/Decoders/OpCodeAlu32.cs
+++ b/ChocolArm64/Decoders/OpCode32Alu.cs
@@ -2,14 +2,14 @@ using ChocolArm64.Instructions;
 
 namespace ChocolArm64.Decoders
 {
-    class OpCodeAlu32 : OpCode32, IOpCodeAlu32
+    class OpCode32Alu : OpCode32, IOpCode32Alu
     {
         public int Rd { get; private set; }
         public int Rn { get; private set; }
 
         public bool SetFlags { get; private set; }
 
-        public OpCodeAlu32(Inst inst, long position, int opCode) : base(inst, position, opCode)
+        public OpCode32Alu(Inst inst, long position, int opCode) : base(inst, position, opCode)
         {
             Rd = (opCode >> 12) & 0xf;
             Rn = (opCode >> 16) & 0xf;
diff --git a/ChocolArm64/Decoders/OpCodeAluImm32.cs b/ChocolArm64/Decoders/OpCode32AluImm.cs
similarity index 80%
rename from ChocolArm64/Decoders/OpCodeAluImm32.cs
rename to ChocolArm64/Decoders/OpCode32AluImm.cs
index 22436709c6..4302f117e4 100644
--- a/ChocolArm64/Decoders/OpCodeAluImm32.cs
+++ b/ChocolArm64/Decoders/OpCode32AluImm.cs
@@ -2,13 +2,13 @@ using ChocolArm64.Instructions;
 
 namespace ChocolArm64.Decoders
 {
-    class OpCodeAluImm32 : OpCodeAlu32
+    class OpCode32AluImm : OpCode32Alu
     {
         public int Imm { get; private set; }
 
         public bool IsRotated { get; private set; }
 
-        public OpCodeAluImm32(Inst inst, long position, int opCode) : base(inst, position, opCode)
+        public OpCode32AluImm(Inst inst, long position, int opCode) : base(inst, position, opCode)
         {
             int value = (opCode >> 0) & 0xff;
             int shift = (opCode >> 8) & 0xf;
diff --git a/ChocolArm64/Decoders/OpCodeAluRsImm32.cs b/ChocolArm64/Decoders/OpCode32AluRsImm.cs
similarity index 80%
rename from ChocolArm64/Decoders/OpCodeAluRsImm32.cs
rename to ChocolArm64/Decoders/OpCode32AluRsImm.cs
index 7b86044860..f23916be55 100644
--- a/ChocolArm64/Decoders/OpCodeAluRsImm32.cs
+++ b/ChocolArm64/Decoders/OpCode32AluRsImm.cs
@@ -2,14 +2,14 @@ using ChocolArm64.Instructions;
 
 namespace ChocolArm64.Decoders
 {
-    class OpCodeAluRsImm32 : OpCodeAlu32
+    class OpCode32AluRsImm : OpCode32Alu
     {
         public int Rm  { get; private set; }
         public int Imm { get; private set; }
 
         public ShiftType ShiftType { get; private set; }
 
-        public OpCodeAluRsImm32(Inst inst, long position, int opCode) : base(inst, position, opCode)
+        public OpCode32AluRsImm(Inst inst, long position, int opCode) : base(inst, position, opCode)
         {
             Rm  = (opCode >> 0) & 0xf;
             Imm = (opCode >> 7) & 0x1f;
diff --git a/ChocolArm64/Decoders/OpCodeBImm32.cs b/ChocolArm64/Decoders/OpCode32BImm.cs
similarity index 83%
rename from ChocolArm64/Decoders/OpCodeBImm32.cs
rename to ChocolArm64/Decoders/OpCode32BImm.cs
index 127ac17473..43f191eb4d 100644
--- a/ChocolArm64/Decoders/OpCodeBImm32.cs
+++ b/ChocolArm64/Decoders/OpCode32BImm.cs
@@ -2,11 +2,11 @@ using ChocolArm64.Instructions;
 
 namespace ChocolArm64.Decoders
 {
-    class OpCodeBImm32 : OpCode32, IOpCodeBImm32
+    class OpCode32BImm : OpCode32, IOpCode32BImm
     {
         public long Imm { get; private set; }
 
-        public OpCodeBImm32(Inst inst, long position, int opCode) : base(inst, position, opCode)
+        public OpCode32BImm(Inst inst, long position, int opCode) : base(inst, position, opCode)
         {
             uint pc = GetPc();
 
diff --git a/ChocolArm64/Decoders/OpCodeBReg32.cs b/ChocolArm64/Decoders/OpCode32BReg.cs
similarity index 64%
rename from ChocolArm64/Decoders/OpCodeBReg32.cs
rename to ChocolArm64/Decoders/OpCode32BReg.cs
index f89b1ae124..4a0fa5376b 100644
--- a/ChocolArm64/Decoders/OpCodeBReg32.cs
+++ b/ChocolArm64/Decoders/OpCode32BReg.cs
@@ -2,11 +2,11 @@ using ChocolArm64.Instructions;
 
 namespace ChocolArm64.Decoders
 {
-    class OpCodeBReg32 : OpCode32, IOpCodeBReg32
+    class OpCode32BReg : OpCode32, IOpCode32BReg
     {
         public int Rm { get; private set; }
 
-        public OpCodeBReg32(Inst inst, long position, int opCode) : base(inst, position, opCode)
+        public OpCode32BReg(Inst inst, long position, int opCode) : base(inst, position, opCode)
         {
             Rm = opCode & 0xf;
         }
diff --git a/ChocolArm64/Decoders/OpCode32Mem.cs b/ChocolArm64/Decoders/OpCode32Mem.cs
new file mode 100644
index 0000000000..ed648a5f73
--- /dev/null
+++ b/ChocolArm64/Decoders/OpCode32Mem.cs
@@ -0,0 +1,37 @@
+using ChocolArm64.Instructions;
+
+namespace ChocolArm64.Decoders
+{
+    class OpCode32Mem : OpCode32, IOpCode32Mem
+    {
+        public int  Rt { get; private set; }
+        public int  Rn { get; private set; }
+
+        public int Imm { get; protected set; }
+
+        public bool Index        { get; private set; }
+        public bool Add          { get; private set; }
+        public bool WBack        { get; private set; }
+        public bool Unprivileged { get; private set; }
+
+        public bool IsLoad { get; private set; }
+
+        public OpCode32Mem(Inst inst, long position, int opCode) : base(inst, position, opCode)
+        {
+            Rt = (opCode >> 12) & 0xf;
+            Rn = (opCode >> 16) & 0xf;
+
+            bool isLoad = (opCode & (1 << 20)) != 0;
+            bool w      = (opCode & (1 << 21)) != 0;
+            bool u      = (opCode & (1 << 23)) != 0;
+            bool p      = (opCode & (1 << 24)) != 0;
+
+            Index        = p;
+            Add          = u;
+            WBack        = !p || w;
+            Unprivileged = !p && w;
+
+            IsLoad = isLoad || inst.Emitter == InstEmit32.Ldrd;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ChocolArm64/Decoders/OpCode32MemImm.cs b/ChocolArm64/Decoders/OpCode32MemImm.cs
new file mode 100644
index 0000000000..ca46e08f87
--- /dev/null
+++ b/ChocolArm64/Decoders/OpCode32MemImm.cs
@@ -0,0 +1,12 @@
+using ChocolArm64.Instructions;
+
+namespace ChocolArm64.Decoders
+{
+    class OpCode32MemImm : OpCode32Mem
+    {
+        public OpCode32MemImm(Inst inst, long position, int opCode) : base(inst, position, opCode)
+        {
+            Imm = opCode & 0xfff;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ChocolArm64/Decoders/OpCode32MemImm8.cs b/ChocolArm64/Decoders/OpCode32MemImm8.cs
new file mode 100644
index 0000000000..02e446e8f1
--- /dev/null
+++ b/ChocolArm64/Decoders/OpCode32MemImm8.cs
@@ -0,0 +1,15 @@
+using ChocolArm64.Instructions;
+
+namespace ChocolArm64.Decoders
+{
+    class OpCode32MemImm8 : OpCode32Mem
+    {
+        public OpCode32MemImm8(Inst inst, long position, int opCode) : base(inst, position, opCode)
+        {
+            int imm4L = (opCode >> 0) & 0xf;
+            int imm4H = (opCode >> 8) & 0xf;
+
+            Imm = imm4L | (imm4H << 4);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ChocolArm64/Decoders/OpCode32MemMult.cs b/ChocolArm64/Decoders/OpCode32MemMult.cs
new file mode 100644
index 0000000000..652da8a552
--- /dev/null
+++ b/ChocolArm64/Decoders/OpCode32MemMult.cs
@@ -0,0 +1,57 @@
+using ChocolArm64.Instructions;
+
+namespace ChocolArm64.Decoders
+{
+    class OpCode32MemMult : OpCode32, IOpCode32MemMult
+    {
+        public int Rn { get; private set; }
+
+        public int RegisterMask { get; private set; }
+        public int Offset       { get; private set; }
+        public int PostOffset   { get; private set; }
+
+        public bool IsLoad { get; private set; }
+
+        public OpCode32MemMult(Inst inst, long position, int opCode) : base(inst, position, opCode)
+        {
+            Rn = (opCode >> 16) & 0xf;
+
+            bool isLoad = (opCode & (1 << 20)) != 0;
+            bool w      = (opCode & (1 << 21)) != 0;
+            bool u      = (opCode & (1 << 23)) != 0;
+            bool p      = (opCode & (1 << 24)) != 0;
+
+            RegisterMask = opCode & 0xffff;
+
+            int regsSize = 0;
+
+            for (int index = 0; index < 16; index++)
+            {
+                regsSize += (RegisterMask >> index) & 1;
+            }
+
+            regsSize *= 4;
+
+            if (!u)
+            {
+                Offset -= regsSize;
+            }
+
+            if (u == p)
+            {
+                Offset += 4;
+            }
+
+            if (w)
+            {
+                PostOffset = u ? regsSize : -regsSize;
+            }
+            else
+            {
+                PostOffset = 0;
+            }
+
+            IsLoad = isLoad;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ChocolArm64/Decoders/OpCodeAluImm8T16.cs b/ChocolArm64/Decoders/OpCodeT16AluImm8.cs
similarity index 77%
rename from ChocolArm64/Decoders/OpCodeAluImm8T16.cs
rename to ChocolArm64/Decoders/OpCodeT16AluImm8.cs
index beb6dcaa57..52c059f40e 100644
--- a/ChocolArm64/Decoders/OpCodeAluImm8T16.cs
+++ b/ChocolArm64/Decoders/OpCodeT16AluImm8.cs
@@ -2,7 +2,7 @@ using ChocolArm64.Instructions;
 
 namespace ChocolArm64.Decoders
 {
-    class OpCodeAluImm8T16 : OpCodeT16, IOpCodeAlu32
+    class OpCodeT16AluImm8 : OpCodeT16, IOpCode32Alu
     {
         private int _rdn;
 
@@ -13,7 +13,7 @@ namespace ChocolArm64.Decoders
 
         public int Imm { get; private set; }
 
-        public OpCodeAluImm8T16(Inst inst, long position, int opCode) : base(inst, position, opCode)
+        public OpCodeT16AluImm8(Inst inst, long position, int opCode) : base(inst, position, opCode)
         {
             Imm  = (opCode >> 0) & 0xff;
             _rdn = (opCode >> 8) & 0x7;
diff --git a/ChocolArm64/Decoders/OpCodeBRegT16.cs b/ChocolArm64/Decoders/OpCodeT16BReg.cs
similarity index 65%
rename from ChocolArm64/Decoders/OpCodeBRegT16.cs
rename to ChocolArm64/Decoders/OpCodeT16BReg.cs
index c6c251308a..2951470055 100644
--- a/ChocolArm64/Decoders/OpCodeBRegT16.cs
+++ b/ChocolArm64/Decoders/OpCodeT16BReg.cs
@@ -2,11 +2,11 @@ using ChocolArm64.Instructions;
 
 namespace ChocolArm64.Decoders
 {
-    class OpCodeBRegT16 : OpCodeT16, IOpCodeBReg32
+    class OpCodeT16BReg : OpCodeT16, IOpCode32BReg
     {
         public int Rm { get; private set; }
 
-        public OpCodeBRegT16(Inst inst, long position, int opCode) : base(inst, position, opCode)
+        public OpCodeT16BReg(Inst inst, long position, int opCode) : base(inst, position, opCode)
         {
             Rm = (opCode >> 3) & 0xf;
         }
diff --git a/ChocolArm64/Instructions/InstEmit32Helper.cs b/ChocolArm64/Instructions/InstEmit32Helper.cs
index d3ff81387c..792e96f5a8 100644
--- a/ChocolArm64/Instructions/InstEmit32Helper.cs
+++ b/ChocolArm64/Instructions/InstEmit32Helper.cs
@@ -2,6 +2,7 @@ using ChocolArm64.Decoders;
 using ChocolArm64.State;
 using ChocolArm64.Translation;
 using System;
+using System.Reflection.Emit;
 
 namespace ChocolArm64.Instructions
 {
@@ -26,6 +27,51 @@ namespace ChocolArm64.Instructions
             }
         }
 
+        public static void EmitStoreToRegister(ILEmitterCtx context, int register)
+        {
+            if (register == RegisterAlias.Aarch32Pc)
+            {
+                context.EmitStoreState();
+
+                EmitBxWritePc(context);
+            }
+            else
+            {
+                context.EmitStint(GetRegisterAlias(context.Mode, register));
+            }
+        }
+
+        public static void EmitBxWritePc(ILEmitterCtx context)
+        {
+            context.Emit(OpCodes.Dup);
+
+            context.EmitLdc_I4(1);
+
+            context.Emit(OpCodes.And);
+            context.Emit(OpCodes.Dup);
+
+            context.EmitStflg((int)PState.TBit);
+
+            ILLabel lblArmMode = new ILLabel();
+            ILLabel lblEnd     = new ILLabel();
+
+            context.Emit(OpCodes.Brtrue_S, lblArmMode);
+
+            context.EmitLdc_I4(~1);
+
+            context.Emit(OpCodes.Br_S, lblEnd);
+
+            context.MarkLabel(lblArmMode);
+
+            context.EmitLdc_I4(~3);
+
+            context.MarkLabel(lblEnd);
+
+            context.Emit(OpCodes.And);
+            context.Emit(OpCodes.Conv_U8);
+            context.Emit(OpCodes.Ret);
+        }
+
         public static int GetRegisterAlias(Aarch32Mode mode, int register)
         {
             //Only registers >= 8 are banked, with registers in the range [8, 12] being
diff --git a/ChocolArm64/Instructions/InstEmitAlu32.cs b/ChocolArm64/Instructions/InstEmitAlu32.cs
index 2ebb807385..539e7c4366 100644
--- a/ChocolArm64/Instructions/InstEmitAlu32.cs
+++ b/ChocolArm64/Instructions/InstEmitAlu32.cs
@@ -12,7 +12,7 @@ namespace ChocolArm64.Instructions
     {
         public static void Add(ILEmitterCtx context)
         {
-            IOpCodeAlu32 op = (IOpCodeAlu32)context.CurrOp;
+            IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
 
             EmitAluLoadOpers(context, setCarry: false);
 
@@ -29,9 +29,25 @@ namespace ChocolArm64.Instructions
             EmitAluStore(context);
         }
 
+        public static void Cmp(ILEmitterCtx context)
+        {
+            IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+            EmitAluLoadOpers(context, setCarry: false);
+
+            context.Emit(OpCodes.Sub);
+
+            context.EmitZnFlagCheck();
+
+            EmitSubsCCheck(context);
+            EmitSubsVCheck(context);
+
+            context.Emit(OpCodes.Pop);
+        }
+
         public static void Mov(ILEmitterCtx context)
         {
-            IOpCodeAlu32 op = (IOpCodeAlu32)context.CurrOp;
+            IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
 
             EmitAluLoadOper2(context);
 
@@ -45,7 +61,7 @@ namespace ChocolArm64.Instructions
 
         public static void Sub(ILEmitterCtx context)
         {
-            IOpCodeAlu32 op = (IOpCodeAlu32)context.CurrOp;
+            IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
 
             EmitAluLoadOpers(context, setCarry: false);
 
@@ -64,7 +80,7 @@ namespace ChocolArm64.Instructions
 
         private static void EmitAluStore(ILEmitterCtx context)
         {
-            IOpCodeAlu32 op = (IOpCodeAlu32)context.CurrOp;
+            IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
 
             if (op.Rd == RegisterAlias.Aarch32Pc)
             {
@@ -106,6 +122,8 @@ namespace ChocolArm64.Instructions
 
         private static void EmitAluWritePc(ILEmitterCtx context)
         {
+            context.EmitStoreState();
+
             if (IsThumb(context.CurrOp))
             {
                 context.EmitLdc_I4(~1);
diff --git a/ChocolArm64/Instructions/InstEmitAluHelper.cs b/ChocolArm64/Instructions/InstEmitAluHelper.cs
index db8fd0e57d..181f645ab9 100644
--- a/ChocolArm64/Instructions/InstEmitAluHelper.cs
+++ b/ChocolArm64/Instructions/InstEmitAluHelper.cs
@@ -127,7 +127,7 @@ namespace ChocolArm64.Instructions
             {
                 context.EmitLdintzr(op.Rm);
             }
-            else if (context.CurrOp is OpCodeAluRsImm32 op32)
+            else if (context.CurrOp is OpCode32AluRsImm op32)
             {
                 InstEmit32Helper.EmitLoadFromRegister(context, op32.Rm);
             }
@@ -156,7 +156,7 @@ namespace ChocolArm64.Instructions
                     context.EmitLdint(op.Rn);
                 }
             }
-            else if (context.CurrOp is IOpCodeAlu32 op32)
+            else if (context.CurrOp is IOpCode32Alu op32)
             {
                 InstEmit32Helper.EmitLoadFromRegister(context, op32.Rn);
             }
@@ -171,7 +171,7 @@ namespace ChocolArm64.Instructions
             switch (context.CurrOp)
             {
                 //ARM32.
-                case OpCodeAluImm32 op:
+                case OpCode32AluImm op:
                     context.EmitLdc_I4(op.Imm);
 
                     if (op.SetFlags && op.IsRotated)
@@ -182,11 +182,11 @@ namespace ChocolArm64.Instructions
                     }
                     break;
 
-                case OpCodeAluRsImm32 op:
+                case OpCode32AluRsImm op:
                     EmitLoadRmShiftedByImmediate(context, op, setCarry);
                     break;
 
-                case OpCodeAluImm8T16 op:
+                case OpCodeT16AluImm8 op:
                     context.EmitLdc_I4(op.Imm);
                     break;
 
@@ -246,7 +246,7 @@ namespace ChocolArm64.Instructions
         }
 
         //ARM32 helpers.
-        private static void EmitLoadRmShiftedByImmediate(ILEmitterCtx context, OpCodeAluRsImm32 op, bool setCarry)
+        private static void EmitLoadRmShiftedByImmediate(ILEmitterCtx context, OpCode32AluRsImm op, bool setCarry)
         {
             int shift = op.Imm;
 
diff --git a/ChocolArm64/Instructions/InstEmitFlow32.cs b/ChocolArm64/Instructions/InstEmitFlow32.cs
index 03b3993640..61f1d34c53 100644
--- a/ChocolArm64/Instructions/InstEmitFlow32.cs
+++ b/ChocolArm64/Instructions/InstEmitFlow32.cs
@@ -11,7 +11,7 @@ namespace ChocolArm64.Instructions
     {
         public static void B(ILEmitterCtx context)
         {
-            IOpCodeBImm32 op = (IOpCodeBImm32)context.CurrOp;
+            IOpCode32BImm op = (IOpCode32BImm)context.CurrOp;
 
             if (context.CurrBlock.Branch != null)
             {
@@ -38,7 +38,7 @@ namespace ChocolArm64.Instructions
 
         public static void Bx(ILEmitterCtx context)
         {
-            IOpCodeBReg32 op = (IOpCodeBReg32)context.CurrOp;
+            IOpCode32BReg op = (IOpCode32BReg)context.CurrOp;
 
             context.EmitStoreState();
 
@@ -49,7 +49,7 @@ namespace ChocolArm64.Instructions
 
         private static void Blx(ILEmitterCtx context, bool x)
         {
-            IOpCodeBImm32 op = (IOpCodeBImm32)context.CurrOp;
+            IOpCode32BImm op = (IOpCode32BImm)context.CurrOp;
 
             uint pc = op.GetPc();
 
@@ -78,22 +78,5 @@ namespace ChocolArm64.Instructions
 
             InstEmitFlowHelper.EmitCall(context, op.Imm);
         }
-
-        private static void EmitBxWritePc(ILEmitterCtx context)
-        {
-            context.Emit(OpCodes.Dup);
-
-            context.EmitLdc_I4(1);
-
-            context.Emit(OpCodes.And);
-
-            context.EmitStflg((int)PState.TBit);
-
-            context.EmitLdc_I4(~1);
-
-            context.Emit(OpCodes.And);
-            context.Emit(OpCodes.Conv_U8);
-            context.Emit(OpCodes.Ret);
-        }
     }
 }
\ No newline at end of file
diff --git a/ChocolArm64/Instructions/InstEmitMemory32.cs b/ChocolArm64/Instructions/InstEmitMemory32.cs
new file mode 100644
index 0000000000..4d6a57a472
--- /dev/null
+++ b/ChocolArm64/Instructions/InstEmitMemory32.cs
@@ -0,0 +1,325 @@
+using ChocolArm64.Decoders;
+using ChocolArm64.State;
+using ChocolArm64.Translation;
+using System;
+using System.Reflection.Emit;
+
+using static ChocolArm64.Instructions.InstEmit32Helper;
+using static ChocolArm64.Instructions.InstEmitMemoryHelper;
+
+namespace ChocolArm64.Instructions
+{
+    static partial class InstEmit32
+    {
+        private const int ByteSizeLog2  = 0;
+        private const int HWordSizeLog2 = 1;
+        private const int WordSizeLog2  = 2;
+        private const int DWordSizeLog2 = 3;
+
+        [Flags]
+        enum AccessType
+        {
+            Store  = 0,
+            Signed = 1,
+            Load   = 2,
+
+            LoadZx = Load,
+            LoadSx = Load | Signed,
+        }
+
+        public static void Ldm(ILEmitterCtx context)
+        {
+            OpCode32MemMult op = (OpCode32MemMult)context.CurrOp;
+
+            EmitLoadFromRegister(context, op.Rn);
+
+            bool writesToPc = (op.RegisterMask & (1 << RegisterAlias.Aarch32Pc)) != 0;
+
+            bool writeBack = op.PostOffset != 0 && (op.Rn != RegisterAlias.Aarch32Pc || !writesToPc);
+
+            if (writeBack)
+            {
+                context.Emit(OpCodes.Dup);
+            }
+
+            context.EmitLdc_I4(op.Offset);
+
+            context.Emit(OpCodes.Add);
+
+            context.EmitSttmp();
+
+            if (writeBack)
+            {
+                context.EmitLdc_I4(op.PostOffset);
+
+                context.Emit(OpCodes.Add);
+
+                EmitStoreToRegister(context, op.Rn);
+            }
+
+            int mask   = op.RegisterMask;
+            int offset = 0;
+
+            for (int register = 0; mask != 0; mask >>= 1, register++)
+            {
+                if ((mask & 1) != 0)
+                {
+                    context.EmitLdarg(TranslatedSub.MemoryArgIdx);
+                    context.EmitLdtmp();
+
+                    context.EmitLdc_I4(offset);
+
+                    context.Emit(OpCodes.Add);
+
+                    EmitReadZxCall(context, WordSizeLog2);
+
+                    EmitStoreToRegister(context, register);
+
+                    offset += 4;
+                }
+            }
+        }
+
+        public static void Ldr(ILEmitterCtx context)
+        {
+            EmitLoadOrStore(context, WordSizeLog2, AccessType.LoadZx);
+        }
+
+        public static void Ldrb(ILEmitterCtx context)
+        {
+            EmitLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx);
+        }
+
+        public static void Ldrd(ILEmitterCtx context)
+        {
+            EmitLoadOrStore(context, DWordSizeLog2, AccessType.LoadZx);
+        }
+
+        public static void Ldrh(ILEmitterCtx context)
+        {
+            EmitLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx);
+        }
+
+        public static void Ldrsb(ILEmitterCtx context)
+        {
+            EmitLoadOrStore(context, ByteSizeLog2, AccessType.LoadSx);
+        }
+
+        public static void Ldrsh(ILEmitterCtx context)
+        {
+            EmitLoadOrStore(context, HWordSizeLog2, AccessType.LoadSx);
+        }
+
+        public static void Stm(ILEmitterCtx context)
+        {
+            OpCode32MemMult op = (OpCode32MemMult)context.CurrOp;
+
+            EmitLoadFromRegister(context, op.Rn);
+
+            context.EmitLdc_I4(op.Offset);
+
+            context.Emit(OpCodes.Add);
+
+            context.EmitSttmp();
+
+            int mask   = op.RegisterMask;
+            int offset = 0;
+
+            for (int register = 0; mask != 0; mask >>= 1, register++)
+            {
+                if ((mask & 1) != 0)
+                {
+                    context.EmitLdarg(TranslatedSub.MemoryArgIdx);
+                    context.EmitLdtmp();
+
+                    context.EmitLdc_I4(offset);
+
+                    context.Emit(OpCodes.Add);
+
+                    EmitLoadFromRegister(context, register);
+
+                    EmitWriteCall(context, WordSizeLog2);
+
+                    //Note: If Rn is also specified on the register list,
+                    //and Rn is the first register on this list, then the
+                    //value that is written to memory is the unmodified value,
+                    //before the write back. If it is on the list, but it's
+                    //not the first one, then the value written to memory
+                    //varies between CPUs.
+                    if (offset == 0 && op.PostOffset != 0)
+                    {
+                        //Emit write back after the first write.
+                        EmitLoadFromRegister(context, op.Rn);
+
+                        context.EmitLdc_I4(op.PostOffset);
+
+                        context.Emit(OpCodes.Add);
+
+                        EmitStoreToRegister(context, op.Rn);
+                    }
+
+                    offset += 4;
+                }
+            }
+        }
+
+        public static void Str(ILEmitterCtx context)
+        {
+            EmitLoadOrStore(context, WordSizeLog2, AccessType.Store);
+        }
+
+        public static void Strb(ILEmitterCtx context)
+        {
+            EmitLoadOrStore(context, ByteSizeLog2, AccessType.Store);
+        }
+
+        public static void Strd(ILEmitterCtx context)
+        {
+            EmitLoadOrStore(context, DWordSizeLog2, AccessType.Store);
+        }
+
+        public static void Strh(ILEmitterCtx context)
+        {
+            EmitLoadOrStore(context, HWordSizeLog2, AccessType.Store);
+        }
+
+        private static void EmitLoadOrStore(ILEmitterCtx context, int size, AccessType accType)
+        {
+            OpCode32Mem op = (OpCode32Mem)context.CurrOp;
+
+            if (op.Index || op.WBack)
+            {
+                EmitLoadFromRegister(context, op.Rn);
+
+                context.EmitLdc_I4(op.Imm);
+
+                context.Emit(op.Add ? OpCodes.Add : OpCodes.Sub);
+
+                context.EmitSttmp();
+            }
+
+            context.EmitLdarg(TranslatedSub.MemoryArgIdx);
+
+            if (op.Index)
+            {
+                context.EmitLdtmp();
+            }
+            else
+            {
+                EmitLoadFromRegister(context, op.Rn);
+            }
+
+            if ((accType & AccessType.Load) != 0)
+            {
+                if ((accType & AccessType.Signed) != 0)
+                {
+                    EmitReadSx32Call(context, size);
+                }
+                else
+                {
+                    EmitReadZxCall(context, size);
+                }
+
+                if (op.WBack)
+                {
+                    context.EmitLdtmp();
+
+                    EmitStoreToRegister(context, op.Rn);
+                }
+
+                if (size == DWordSizeLog2)
+                {
+                    context.Emit(OpCodes.Dup);
+
+                    context.EmitLdflg((int)PState.EBit);
+
+                    ILLabel lblBigEndian = new ILLabel();
+                    ILLabel lblEnd       = new ILLabel();
+
+                    context.Emit(OpCodes.Brtrue_S, lblBigEndian);
+
+                    //Little endian mode.
+                    context.Emit(OpCodes.Conv_U4);
+
+                    EmitStoreToRegister(context, op.Rt);
+
+                    context.EmitLsr(32);
+
+                    context.Emit(OpCodes.Conv_U4);
+
+                    EmitStoreToRegister(context, op.Rt | 1);
+
+                    context.Emit(OpCodes.Br_S, lblEnd);
+
+                    //Big endian mode.
+                    context.MarkLabel(lblBigEndian);
+
+                    context.EmitLsr(32);
+
+                    context.Emit(OpCodes.Conv_U4);
+
+                    EmitStoreToRegister(context, op.Rt);
+
+                    context.Emit(OpCodes.Conv_U4);
+
+                    EmitStoreToRegister(context, op.Rt | 1);
+
+                    context.MarkLabel(lblEnd);
+                }
+                else
+                {
+                    EmitStoreToRegister(context, op.Rt);
+                }
+            }
+            else
+            {
+                if (op.WBack)
+                {
+                    context.EmitLdtmp();
+
+                    EmitStoreToRegister(context, op.Rn);
+                }
+
+                EmitLoadFromRegister(context, op.Rt);
+
+                if (size == DWordSizeLog2)
+                {
+                    context.Emit(OpCodes.Conv_U8);
+
+                    context.EmitLdflg((int)PState.EBit);
+
+                    ILLabel lblBigEndian = new ILLabel();
+                    ILLabel lblEnd       = new ILLabel();
+
+                    context.Emit(OpCodes.Brtrue_S, lblBigEndian);
+
+                    //Little endian mode.
+                    EmitLoadFromRegister(context, op.Rt | 1);
+
+                    context.Emit(OpCodes.Conv_U8);
+
+                    context.EmitLsl(32);
+
+                    context.Emit(OpCodes.Or);
+
+                    context.Emit(OpCodes.Br_S, lblEnd);
+
+                    //Big endian mode.
+                    context.MarkLabel(lblBigEndian);
+
+                    context.EmitLsl(32);
+
+                    EmitLoadFromRegister(context, op.Rt | 1);
+
+                    context.Emit(OpCodes.Conv_U8);
+
+                    context.Emit(OpCodes.Or);
+
+                    context.MarkLabel(lblEnd);
+                }
+
+                EmitWriteCall(context, size);
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ChocolArm64/OpCodeTable.cs b/ChocolArm64/OpCodeTable.cs
index db7a4ca9e8..3a8d3948d8 100644
--- a/ChocolArm64/OpCodeTable.cs
+++ b/ChocolArm64/OpCodeTable.cs
@@ -37,18 +37,32 @@ namespace ChocolArm64
         {
 #region "OpCode Table (AArch32)"
             //Integer
-            SetA32("<<<<0010100xxxxxxxxxxxxxxxxxxxxx", InstEmit32.Add, typeof(OpCodeAluImm32));
-            SetA32("<<<<0000100xxxxxxxxxxxxxxxx0xxxx", InstEmit32.Add, typeof(OpCodeAluRsImm32));
-            SetA32("<<<<1010xxxxxxxxxxxxxxxxxxxxxxxx", InstEmit32.B,   typeof(OpCodeBImm32));
-            SetA32("<<<<1011xxxxxxxxxxxxxxxxxxxxxxxx", InstEmit32.Bl,  typeof(OpCodeBImm32));
-            SetA32("1111101xxxxxxxxxxxxxxxxxxxxxxxxx", InstEmit32.Blx, typeof(OpCodeBImm32));
-            SetA32("<<<<000100101111111111110001xxxx", InstEmit32.Bx,  typeof(OpCodeBReg32));
-            SetT32(                "010001110xxxx000", InstEmit32.Bx,  typeof(OpCodeBRegT16));
-            SetA32("<<<<0011101x0000xxxxxxxxxxxxxxxx", InstEmit32.Mov, typeof(OpCodeAluImm32));
-            SetA32("<<<<0001101x0000xxxxxxxxxxx0xxxx", InstEmit32.Mov, typeof(OpCodeAluRsImm32));
-            SetT32(                "00100xxxxxxxxxxx", InstEmit32.Mov, typeof(OpCodeAluImm8T16));
-            SetA32("<<<<0010010xxxxxxxxxxxxxxxxxxxxx", InstEmit32.Sub, typeof(OpCodeAluImm32));
-            SetA32("<<<<0000010xxxxxxxxxxxxxxxx0xxxx", InstEmit32.Sub, typeof(OpCodeAluRsImm32));
+            SetA32("<<<<0010100xxxxxxxxxxxxxxxxxxxxx", InstEmit32.Add,           typeof(OpCode32AluImm));
+            SetA32("<<<<0000100xxxxxxxxxxxxxxxx0xxxx", InstEmit32.Add,           typeof(OpCode32AluRsImm));
+            SetA32("<<<<1010xxxxxxxxxxxxxxxxxxxxxxxx", InstEmit32.B,             typeof(OpCode32BImm));
+            SetA32("<<<<1011xxxxxxxxxxxxxxxxxxxxxxxx", InstEmit32.Bl,            typeof(OpCode32BImm));
+            SetA32("1111101xxxxxxxxxxxxxxxxxxxxxxxxx", InstEmit32.Blx,           typeof(OpCode32BImm));
+            SetA32("<<<<000100101111111111110001xxxx", InstEmit32.Bx,            typeof(OpCode32BReg));
+            SetT32("010001110xxxx000",                 InstEmit32.Bx,            typeof(OpCodeT16BReg));
+            SetA32("<<<<00110101xxxx0000xxxxxxxxxxxx", InstEmit32.Cmp,           typeof(OpCode32AluImm));
+            SetA32("<<<<00010101xxxx0000xxxxxxx0xxxx", InstEmit32.Cmp,           typeof(OpCode32AluRsImm));
+            SetA32("<<<<100xx0x1xxxxxxxxxxxxxxxxxxxx", InstEmit32.Ldm,           typeof(OpCode32MemMult));
+            SetA32("<<<<010xx0x1xxxxxxxxxxxxxxxxxxxx", InstEmit32.Ldr,           typeof(OpCode32MemImm));
+            SetA32("<<<<010xx1x1xxxxxxxxxxxxxxxxxxxx", InstEmit32.Ldrb,          typeof(OpCode32MemImm));
+            SetA32("<<<<000xx1x0xxxxxxxxxxxx1101xxxx", InstEmit32.Ldrd,          typeof(OpCode32MemImm8));
+            SetA32("<<<<000xx1x1xxxxxxxxxxxx1011xxxx", InstEmit32.Ldrh,          typeof(OpCode32MemImm8));
+            SetA32("<<<<000xx1x1xxxxxxxxxxxx1101xxxx", InstEmit32.Ldrsb,         typeof(OpCode32MemImm8));
+            SetA32("<<<<000xx1x1xxxxxxxxxxxx1111xxxx", InstEmit32.Ldrsh,         typeof(OpCode32MemImm8));
+            SetA32("<<<<0011101x0000xxxxxxxxxxxxxxxx", InstEmit32.Mov,           typeof(OpCode32AluImm));
+            SetA32("<<<<0001101x0000xxxxxxxxxxx0xxxx", InstEmit32.Mov,           typeof(OpCode32AluRsImm));
+            SetT32("00100xxxxxxxxxxx",                 InstEmit32.Mov,           typeof(OpCodeT16AluImm8));
+            SetA32("<<<<100xx0x0xxxxxxxxxxxxxxxxxxxx", InstEmit32.Stm,           typeof(OpCode32MemMult));
+            SetA32("<<<<010xx0x0xxxxxxxxxxxxxxxxxxxx", InstEmit32.Str,           typeof(OpCode32MemImm));
+            SetA32("<<<<010xx1x0xxxxxxxxxxxxxxxxxxxx", InstEmit32.Strb,          typeof(OpCode32MemImm));
+            SetA32("<<<<000xx1x0xxxxxxxxxxxx1111xxxx", InstEmit32.Strd,          typeof(OpCode32MemImm8));
+            SetA32("<<<<000xx1x0xxxxxxxxxxxx1011xxxx", InstEmit32.Strh,          typeof(OpCode32MemImm8));
+            SetA32("<<<<0010010xxxxxxxxxxxxxxxxxxxxx", InstEmit32.Sub,           typeof(OpCode32AluImm));
+            SetA32("<<<<0000010xxxxxxxxxxxxxxxx0xxxx", InstEmit32.Sub,           typeof(OpCode32AluRsImm));
 #endregion
 
 #region "OpCode Table (AArch64)"
diff --git a/ChocolArm64/State/CpuThreadState.cs b/ChocolArm64/State/CpuThreadState.cs
index 6c00bf482f..12edc429a0 100644
--- a/ChocolArm64/State/CpuThreadState.cs
+++ b/ChocolArm64/State/CpuThreadState.cs
@@ -13,8 +13,6 @@ namespace ChocolArm64.State
 
         private const int MinInstForCheck = 4000000;
 
-        public bool Thumb;
-
         public ulong X0,  X1,  X2,  X3,  X4,  X5,  X6,  X7,
                      X8,  X9,  X10, X11, X12, X13, X14, X15,
                      X16, X17, X18, X19, X20, X21, X22, X23,
@@ -25,13 +23,16 @@ namespace ChocolArm64.State
                                 V16, V17, V18, V19, V20, V21, V22, V23,
                                 V24, V25, V26, V27, V28, V29, V30, V31;
 
+        public bool Aarch32;
+
+        public bool Thumb;
+        public bool BigEndian;
+
         public bool Overflow;
         public bool Carry;
         public bool Zero;
         public bool Negative;
 
-        public bool IsAarch32;
-
         public int ElrHyp;
 
         public bool Running { get; set; }
@@ -51,10 +52,10 @@ namespace ChocolArm64.State
         {
             get
             {
-                return (Negative ? (int)PState.N : 0) |
-                       (Zero     ? (int)PState.Z : 0) |
-                       (Carry    ? (int)PState.C : 0) |
-                       (Overflow ? (int)PState.V : 0);
+                return (Negative ? (int)PState.NMask : 0) |
+                       (Zero     ? (int)PState.ZMask : 0) |
+                       (Carry    ? (int)PState.CMask : 0) |
+                       (Overflow ? (int)PState.VMask : 0);
             }
         }
 
@@ -140,7 +141,7 @@ namespace ChocolArm64.State
 
         internal ExecutionMode GetExecutionMode()
         {
-            if (!IsAarch32)
+            if (!Aarch32)
             {
                 return ExecutionMode.Aarch64;
             }
diff --git a/ChocolArm64/State/PState.cs b/ChocolArm64/State/PState.cs
index aef5f53b10..053a5357c1 100644
--- a/ChocolArm64/State/PState.cs
+++ b/ChocolArm64/State/PState.cs
@@ -6,22 +6,19 @@ namespace ChocolArm64.State
     enum PState
     {
         TBit = 5,
+        EBit = 9,
 
         VBit = 28,
         CBit = 29,
         ZBit = 30,
         NBit = 31,
 
-        T = 1 << TBit,
+        TMask = 1 << TBit,
+        EMask = 1 << EBit,
 
-        V = 1 << VBit,
-        C = 1 << CBit,
-        Z = 1 << ZBit,
-        N = 1 << NBit,
-
-        Nz = N | Z,
-        Cv = C | V,
-
-        Nzcv = Nz | Cv
+        VMask = 1 << VBit,
+        CMask = 1 << CBit,
+        ZMask = 1 << ZBit,
+        NMask = 1 << NBit
     }
 }
diff --git a/ChocolArm64/State/Register.cs b/ChocolArm64/State/Register.cs
index 34588231aa..12c3f5c34c 100644
--- a/ChocolArm64/State/Register.cs
+++ b/ChocolArm64/State/Register.cs
@@ -44,6 +44,7 @@ namespace ChocolArm64.State
             switch ((PState)Index)
             {
                 case PState.TBit: return GetField(nameof(CpuThreadState.Thumb));
+                case PState.EBit: return GetField(nameof(CpuThreadState.BigEndian));
 
                 case PState.VBit: return GetField(nameof(CpuThreadState.Overflow));
                 case PState.CBit: return GetField(nameof(CpuThreadState.Carry));
diff --git a/ChocolArm64/Translation/ILEmitterCtx.cs b/ChocolArm64/Translation/ILEmitterCtx.cs
index d4bd93abce..b5ebff75bd 100644
--- a/ChocolArm64/Translation/ILEmitterCtx.cs
+++ b/ChocolArm64/Translation/ILEmitterCtx.cs
@@ -530,7 +530,15 @@ namespace ChocolArm64.Translation
         public void EmitLdflg(int index) => Ldloc(index, IoType.Flag);
         public void EmitStflg(int index)
         {
-            _optOpLastFlagSet = CurrOp;
+            //Set this only if any of the NZCV flag bits were modified.
+            //This is used to ensure that, when emiting a direct IL branch
+            //instruction for compare + branch sequences, we're not expecting
+            //to use comparison values from an old instruction, when in fact
+            //the flags were already overwritten by another instruction further along.
+            if (index >= (int)PState.VBit)
+            {
+                _optOpLastFlagSet = CurrOp;
+            }
 
             Stloc(index, IoType.Flag);
         }
diff --git a/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs b/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs
index c29b0fbc1c..7eb27efc12 100644
--- a/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs
+++ b/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs
@@ -152,10 +152,20 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
 
             Context = new CpuThread(owner.Translator, owner.CpuMemory, (long)entrypoint);
 
-            Context.ThreadState.IsAarch32 = (Owner.MmuFlags & 1) == 0;
+            bool isAarch32 = (Owner.MmuFlags & 1) == 0;
+
+            Context.ThreadState.Aarch32 = isAarch32;
 
             Context.ThreadState.X0  = argsPtr;
-            Context.ThreadState.X31 = stackTop;
+
+            if (isAarch32)
+            {
+                Context.ThreadState.X13 = (uint)stackTop;
+            }
+            else
+            {
+                Context.ThreadState.X31 = stackTop;
+            }
 
             Context.ThreadState.CntfrqEl0 = 19200000;
             Context.ThreadState.Tpidr     = (long)_tlsAddress;