diff --git a/ChocolArm64/Instructions/InstEmitFlow.cs b/ChocolArm64/Instructions/InstEmitFlow.cs
index a842dca9d1..5eae89cc09 100644
--- a/ChocolArm64/Instructions/InstEmitFlow.cs
+++ b/ChocolArm64/Instructions/InstEmitFlow.cs
@@ -39,7 +39,6 @@ namespace ChocolArm64.Instructions
 
             context.EmitLdc_I(op.Position + 4);
             context.EmitStint(RegisterAlias.Lr);
-            context.EmitStoreState();
 
             EmitCall(context, op.Imm);
         }
@@ -60,6 +59,8 @@ namespace ChocolArm64.Instructions
         {
             OpCodeBReg64 op = (OpCodeBReg64)context.CurrOp;
 
+            context.HasIndirectJump = true;
+
             context.EmitStoreState();
             context.EmitLdintzr(op.Rn);
 
diff --git a/ChocolArm64/Instructions/InstEmitFlow32.cs b/ChocolArm64/Instructions/InstEmitFlow32.cs
index 61f1d34c53..dea490c775 100644
--- a/ChocolArm64/Instructions/InstEmitFlow32.cs
+++ b/ChocolArm64/Instructions/InstEmitFlow32.cs
@@ -65,7 +65,6 @@ namespace ChocolArm64.Instructions
             }
 
             context.EmitStint(GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr));
-            context.EmitStoreState();
 
             //If x is true, then this is a branch with link and exchange.
             //In this case we need to swap the mode between Arm <-> Thumb.
diff --git a/ChocolArm64/Instructions/InstEmitFlowHelper.cs b/ChocolArm64/Instructions/InstEmitFlowHelper.cs
index e93ef42679..a6091a5711 100644
--- a/ChocolArm64/Instructions/InstEmitFlowHelper.cs
+++ b/ChocolArm64/Instructions/InstEmitFlowHelper.cs
@@ -11,6 +11,8 @@ namespace ChocolArm64.Instructions
         {
             if (context.Tier == TranslationTier.Tier0)
             {
+                context.EmitStoreState();
+
                 context.TranslateAhead(imm);
 
                 context.EmitLdc_I8(imm);
@@ -22,6 +24,10 @@ namespace ChocolArm64.Instructions
 
             if (!context.TryOptEmitSubroutineCall())
             {
+                context.HasSlowCall = true;
+
+                context.EmitStoreState();
+
                 context.TranslateAhead(imm);
 
                 context.EmitLdarg(TranslatedSub.StateArgIdx);
@@ -32,6 +38,7 @@ namespace ChocolArm64.Instructions
 
                 context.EmitLdarg(TranslatedSub.StateArgIdx);
                 context.EmitLdc_I8(imm);
+                context.EmitLdc_I4((int)CallType.Call);
 
                 context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine));
 
@@ -58,20 +65,6 @@ namespace ChocolArm64.Instructions
         {
             if (context.Tier == TranslationTier.Tier0)
             {
-                context.Emit(OpCodes.Dup);
-
-                context.EmitSttmp();
-                context.EmitLdarg(TranslatedSub.StateArgIdx);
-
-                context.EmitFieldLoad(typeof(CpuThreadState).GetField(nameof(CpuThreadState.CurrentTranslator),
-                    BindingFlags.Instance |
-                    BindingFlags.NonPublic));
-
-                context.EmitLdarg(TranslatedSub.StateArgIdx);
-                context.EmitLdtmp();
-
-                context.EmitPrivateCall(typeof(Translator), nameof(Translator.TranslateVirtualSubroutine));
-
                 context.Emit(OpCodes.Ret);
             }
             else
@@ -85,8 +78,11 @@ namespace ChocolArm64.Instructions
 
                 context.EmitLdarg(TranslatedSub.StateArgIdx);
                 context.EmitLdtmp();
+                context.EmitLdc_I4(isJump
+                    ? (int)CallType.VirtualJump
+                    : (int)CallType.VirtualCall);
 
-                context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateVirtualSubroutine));
+                context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine));
 
                 context.EmitLdarg(TranslatedSub.StateArgIdx);
                 context.EmitLdarg(TranslatedSub.MemoryArgIdx);
diff --git a/ChocolArm64/Optimizations.cs b/ChocolArm64/Optimizations.cs
index 8fa6f4626c..cbb8131f5c 100644
--- a/ChocolArm64/Optimizations.cs
+++ b/ChocolArm64/Optimizations.cs
@@ -2,21 +2,23 @@ using System.Runtime.Intrinsics.X86;
 
 public static class Optimizations
 {
-    internal static bool FastFP = true;
+    public static bool AssumeStrictAbiCompliance { get; set; }
 
-    private static bool _useAllSseIfAvailable = true;
+    public static bool FastFP { get; set; } = true;
 
-    private static bool _useSseIfAvailable   = true;
-    private static bool _useSse2IfAvailable  = true;
-    private static bool _useSse3IfAvailable  = true;
-    private static bool _useSsse3IfAvailable = true;
-    private static bool _useSse41IfAvailable = true;
-    private static bool _useSse42IfAvailable = true;
+    private const bool UseAllSseIfAvailable = true;
 
-    internal static bool UseSse   = (_useAllSseIfAvailable && _useSseIfAvailable)   && Sse.IsSupported;
-    internal static bool UseSse2  = (_useAllSseIfAvailable && _useSse2IfAvailable)  && Sse2.IsSupported;
-    internal static bool UseSse3  = (_useAllSseIfAvailable && _useSse3IfAvailable)  && Sse3.IsSupported;
-    internal static bool UseSsse3 = (_useAllSseIfAvailable && _useSsse3IfAvailable) && Ssse3.IsSupported;
-    internal static bool UseSse41 = (_useAllSseIfAvailable && _useSse41IfAvailable) && Sse41.IsSupported;
-    internal static bool UseSse42 = (_useAllSseIfAvailable && _useSse42IfAvailable) && Sse42.IsSupported;
-}
+    public static bool UseSseIfAvailable   { get; set; } = UseAllSseIfAvailable;
+    public static bool UseSse2IfAvailable  { get; set; } = UseAllSseIfAvailable;
+    public static bool UseSse3IfAvailable  { get; set; } = UseAllSseIfAvailable;
+    public static bool UseSsse3IfAvailable { get; set; } = UseAllSseIfAvailable;
+    public static bool UseSse41IfAvailable { get; set; } = UseAllSseIfAvailable;
+    public static bool UseSse42IfAvailable { get; set; } = UseAllSseIfAvailable;
+
+    internal static bool UseSse   => UseSseIfAvailable   && Sse.IsSupported;
+    internal static bool UseSse2  => UseSse2IfAvailable  && Sse2.IsSupported;
+    internal static bool UseSse3  => UseSse3IfAvailable  && Sse3.IsSupported;
+    internal static bool UseSsse3 => UseSsse3IfAvailable && Ssse3.IsSupported;
+    internal static bool UseSse41 => UseSse41IfAvailable && Sse41.IsSupported;
+    internal static bool UseSse42 => UseSse42IfAvailable && Sse42.IsSupported;
+}
\ No newline at end of file
diff --git a/ChocolArm64/Translation/CallType.cs b/ChocolArm64/Translation/CallType.cs
new file mode 100644
index 0000000000..937ede768a
--- /dev/null
+++ b/ChocolArm64/Translation/CallType.cs
@@ -0,0 +1,9 @@
+namespace ChocolArm64.Translation
+{
+    enum CallType
+    {
+        Call,
+        VirtualCall,
+        VirtualJump
+    }
+}
\ No newline at end of file
diff --git a/ChocolArm64/Translation/ILBlock.cs b/ChocolArm64/Translation/ILBlock.cs
index 136579012b..12773705a1 100644
--- a/ChocolArm64/Translation/ILBlock.cs
+++ b/ChocolArm64/Translation/ILBlock.cs
@@ -4,13 +4,13 @@ namespace ChocolArm64.Translation
 {
     class ILBlock : IILEmit
     {
-        public long IntInputs    { get; private set; }
-        public long IntOutputs   { get; private set; }
-        public long IntAwOutputs { get; private set; }
+        public  long IntInputs  { get; private set; }
+        public  long IntOutputs { get; private set; }
+        private long _intAwOutputs;
 
-        public long VecInputs    { get; private set; }
-        public long VecOutputs   { get; private set; }
-        public long VecAwOutputs { get; private set; }
+        public  long VecInputs  { get; private set; }
+        public  long VecOutputs { get; private set; }
+        private long _vecAwOutputs;
 
         public bool HasStateStore { get; private set; }
 
@@ -34,25 +34,25 @@ namespace ChocolArm64.Translation
                 //opcodes emitted by each ARM instruction.
                 //We can only consider the new outputs for doing input elimination
                 //after all the CIL opcodes used by the instruction being emitted.
-                IntAwOutputs = IntOutputs;
-                VecAwOutputs = VecOutputs;
+                _intAwOutputs = IntOutputs;
+                _vecAwOutputs = VecOutputs;
             }
             else if (emitter is ILOpCodeLoad ld && ILMethodBuilder.IsRegIndex(ld.Index))
             {
-                switch (ld.IoType)
+                switch (ld.VarType)
                 {
-                    case IoType.Flag:   IntInputs |= ((1L << ld.Index) << 32) & ~IntAwOutputs; break;
-                    case IoType.Int:    IntInputs |=  (1L << ld.Index)        & ~IntAwOutputs; break;
-                    case IoType.Vector: VecInputs |=  (1L << ld.Index)        & ~VecAwOutputs; break;
+                    case VarType.Flag:   IntInputs |= ((1L << ld.Index) << 32) & ~_intAwOutputs; break;
+                    case VarType.Int:    IntInputs |=  (1L << ld.Index)        & ~_intAwOutputs; break;
+                    case VarType.Vector: VecInputs |=  (1L << ld.Index)        & ~_vecAwOutputs; break;
                 }
             }
             else if (emitter is ILOpCodeStore st && ILMethodBuilder.IsRegIndex(st.Index))
             {
-                switch (st.IoType)
+                switch (st.VarType)
                 {
-                    case IoType.Flag:   IntOutputs |= (1L << st.Index) << 32; break;
-                    case IoType.Int:    IntOutputs |=  1L << st.Index;        break;
-                    case IoType.Vector: VecOutputs |=  1L << st.Index;        break;
+                    case VarType.Flag:   IntOutputs |= (1L << st.Index) << 32; break;
+                    case VarType.Int:    IntOutputs |=  1L << st.Index;        break;
+                    case VarType.Vector: VecOutputs |=  1L << st.Index;        break;
                 }
             }
             else if (emitter is ILOpCodeStoreState)
diff --git a/ChocolArm64/Translation/ILEmitterCtx.cs b/ChocolArm64/Translation/ILEmitterCtx.cs
index f7e61bc999..91b72b13ae 100644
--- a/ChocolArm64/Translation/ILEmitterCtx.cs
+++ b/ChocolArm64/Translation/ILEmitterCtx.cs
@@ -31,6 +31,10 @@ namespace ChocolArm64.Translation
 
         public Aarch32Mode Mode { get; } = Aarch32Mode.User; //TODO
 
+        public bool HasIndirectJump { get; set; }
+
+        public bool HasSlowCall { get; set; }
+
         private Dictionary<Block, ILBlock> _visitedBlocks;
 
         private Queue<Block> _branchTargets;
@@ -91,7 +95,12 @@ namespace ChocolArm64.Translation
 
             ResetBlockState();
 
-            AdvanceOpCode();
+            if (AdvanceOpCode())
+            {
+                EmitSynchronization();
+
+                _ilBlock.Add(new ILOpCodeLoadState(_ilBlock, isSubEntry: true));
+            }
         }
 
         public static int GetIntTempIndex()
@@ -127,10 +136,18 @@ namespace ChocolArm64.Translation
                 return;
             }
 
-            if (_opcIndex == 0)
+            int opcIndex = _opcIndex;
+
+            if (opcIndex == 0)
             {
                 MarkLabel(GetLabel(_currBlock.Position));
+            }
 
+            bool isLastOp = opcIndex == CurrBlock.OpCodes.Count - 1;
+
+            if (isLastOp && CurrBlock.Branch != null &&
+                     (ulong)CurrBlock.Branch.Position <= (ulong)CurrBlock.Position)
+            {
                 EmitSynchronization();
             }
 
@@ -161,7 +178,7 @@ namespace ChocolArm64.Translation
                 //of the next instruction to be executed (in the case that the condition
                 //is false, and the branch was not taken, as all basic blocks should end with
                 //some kind of branch).
-                if (CurrOp == CurrBlock.GetLastOp() && CurrBlock.Next == null)
+                if (isLastOp && CurrBlock.Next == null)
                 {
                     EmitStoreState();
                     EmitLdc_I8(CurrOp.Position + CurrOp.OpCodeSizeInBytes);
@@ -285,32 +302,43 @@ namespace ChocolArm64.Translation
                 return;
             }
 
-            _queue.Enqueue(new TranslatorQueueItem(position, mode, TranslationTier.Tier1));
+            _queue.Enqueue(position, mode, TranslationTier.Tier1, isComplete: true);
         }
 
         public bool TryOptEmitSubroutineCall()
         {
+            //Calls should always have a next block, unless
+            //we're translating a single basic block.
             if (_currBlock.Next == null)
             {
                 return false;
             }
 
-            if (CurrOp.Emitter != InstEmit.Bl)
+            if (!(CurrOp is IOpCodeBImm op))
             {
                 return false;
             }
 
-            if (!_cache.TryGetSubroutine(((OpCodeBImmAl64)CurrOp).Imm, out TranslatedSub subroutine))
+            if (!_cache.TryGetSubroutine(op.Imm, out TranslatedSub sub))
             {
                 return false;
             }
 
+            //It's not worth to call a Tier0 method, because
+            //it contains slow code, rather than the entire function.
+            if (sub.Tier == TranslationTier.Tier0)
+            {
+                return false;
+            }
+
+            EmitStoreState(sub);
+
             for (int index = 0; index < TranslatedSub.FixedArgTypes.Length; index++)
             {
                 EmitLdarg(index);
             }
 
-            EmitCall(subroutine.Method);
+            EmitCall(sub.Method);
 
             return true;
         }
@@ -321,8 +349,8 @@ namespace ChocolArm64.Translation
 
             InstEmitAluHelper.EmitAluLoadOpers(this);
 
-            Stloc(CmpOptTmp2Index, IoType.Int);
-            Stloc(CmpOptTmp1Index, IoType.Int);
+            Stloc(CmpOptTmp2Index, VarType.Int);
+            Stloc(CmpOptTmp1Index, VarType.Int);
         }
 
         private Dictionary<Condition, OpCode> _branchOps = new Dictionary<Condition, OpCode>()
@@ -346,8 +374,8 @@ namespace ChocolArm64.Translation
             {
                 if (_optOpLastCompare.Emitter == InstEmit.Subs)
                 {
-                    Ldloc(CmpOptTmp1Index, IoType.Int, _optOpLastCompare.RegisterSize);
-                    Ldloc(CmpOptTmp2Index, IoType.Int, _optOpLastCompare.RegisterSize);
+                    Ldloc(CmpOptTmp1Index, VarType.Int, _optOpLastCompare.RegisterSize);
+                    Ldloc(CmpOptTmp2Index, VarType.Int, _optOpLastCompare.RegisterSize);
 
                     Emit(_branchOps[cond], target);
 
@@ -369,7 +397,7 @@ namespace ChocolArm64.Translation
                     //Such invalid values can't be encoded on the immediate encodings.
                     if (_optOpLastCompare is IOpCodeAluImm64 op)
                     {
-                        Ldloc(CmpOptTmp1Index, IoType.Int, _optOpLastCompare.RegisterSize);
+                        Ldloc(CmpOptTmp1Index, VarType.Int, _optOpLastCompare.RegisterSize);
 
                         if (_optOpLastCompare.RegisterSize == RegisterSize.Int32)
                         {
@@ -491,14 +519,14 @@ namespace ChocolArm64.Translation
         {
             if (amount > 0)
             {
-                Stloc(RorTmpIndex, IoType.Int);
-                Ldloc(RorTmpIndex, IoType.Int);
+                Stloc(RorTmpIndex, VarType.Int);
+                Ldloc(RorTmpIndex, VarType.Int);
 
                 EmitLdc_I4(amount);
 
                 Emit(OpCodes.Shr_Un);
 
-                Ldloc(RorTmpIndex, IoType.Int);
+                Ldloc(RorTmpIndex, VarType.Int);
 
                 EmitLdc_I4(CurrOp.GetBitsCount() - amount);
 
@@ -546,7 +574,7 @@ namespace ChocolArm64.Translation
 
         public void EmitLdarg(int index)
         {
-            _ilBlock.Add(new ILOpCodeLoad(index, IoType.Arg));
+            _ilBlock.Add(new ILOpCodeLoad(index, VarType.Arg));
         }
 
         public void EmitLdintzr(int index)
@@ -588,6 +616,11 @@ namespace ChocolArm64.Translation
             _ilBlock.Add(new ILOpCodeStoreState(_ilBlock));
         }
 
+        private void EmitStoreState(TranslatedSub callSub)
+        {
+            _ilBlock.Add(new ILOpCodeStoreState(_ilBlock, callSub));
+        }
+
         public void EmitLdtmp() => EmitLdint(IntGpTmp1Index);
         public void EmitSttmp() => EmitStint(IntGpTmp1Index);
 
@@ -600,13 +633,13 @@ namespace ChocolArm64.Translation
         public void EmitLdvectmp2() => EmitLdvec(VecGpTmp2Index);
         public void EmitStvectmp2() => EmitStvec(VecGpTmp2Index);
 
-        public void EmitLdint(int index) => Ldloc(index, IoType.Int);
-        public void EmitStint(int index) => Stloc(index, IoType.Int);
+        public void EmitLdint(int index) => Ldloc(index, VarType.Int);
+        public void EmitStint(int index) => Stloc(index, VarType.Int);
 
-        public void EmitLdvec(int index) => Ldloc(index, IoType.Vector);
-        public void EmitStvec(int index) => Stloc(index, IoType.Vector);
+        public void EmitLdvec(int index) => Ldloc(index, VarType.Vector);
+        public void EmitStvec(int index) => Stloc(index, VarType.Vector);
 
-        public void EmitLdflg(int index) => Ldloc(index, IoType.Flag);
+        public void EmitLdflg(int index) => Ldloc(index, VarType.Flag);
         public void EmitStflg(int index)
         {
             //Set this only if any of the NZCV flag bits were modified.
@@ -619,22 +652,22 @@ namespace ChocolArm64.Translation
                 _optOpLastFlagSet = CurrOp;
             }
 
-            Stloc(index, IoType.Flag);
+            Stloc(index, VarType.Flag);
         }
 
-        private void Ldloc(int index, IoType ioType)
+        private void Ldloc(int index, VarType varType)
         {
-            _ilBlock.Add(new ILOpCodeLoad(index, ioType, CurrOp.RegisterSize));
+            _ilBlock.Add(new ILOpCodeLoad(index, varType, CurrOp.RegisterSize));
         }
 
-        private void Ldloc(int index, IoType ioType, RegisterSize registerSize)
+        private void Ldloc(int index, VarType varType, RegisterSize registerSize)
         {
-            _ilBlock.Add(new ILOpCodeLoad(index, ioType, registerSize));
+            _ilBlock.Add(new ILOpCodeLoad(index, varType, registerSize));
         }
 
-        private void Stloc(int index, IoType ioType)
+        private void Stloc(int index, VarType varType)
         {
-            _ilBlock.Add(new ILOpCodeStore(index, ioType, CurrOp.RegisterSize));
+            _ilBlock.Add(new ILOpCodeStore(index, varType, CurrOp.RegisterSize));
         }
 
         public void EmitCallPropGet(Type objType, string propName)
diff --git a/ChocolArm64/Translation/ILLabel.cs b/ChocolArm64/Translation/ILLabel.cs
index f423a4256c..17a31783df 100644
--- a/ChocolArm64/Translation/ILLabel.cs
+++ b/ChocolArm64/Translation/ILLabel.cs
@@ -6,7 +6,7 @@ namespace ChocolArm64.Translation
     {
         private bool _hasLabel;
 
-        private Label _lbl;
+        private Label _label;
 
         public void Emit(ILMethodBuilder context)
         {
@@ -17,12 +17,12 @@ namespace ChocolArm64.Translation
         {
             if (!_hasLabel)
             {
-                _lbl = context.Generator.DefineLabel();
+                _label = context.Generator.DefineLabel();
 
                 _hasLabel = true;
             }
 
-            return _lbl;
+            return _label;
         }
     }
 }
\ No newline at end of file
diff --git a/ChocolArm64/Translation/ILMethodBuilder.cs b/ChocolArm64/Translation/ILMethodBuilder.cs
index 892f831be3..98b5052043 100644
--- a/ChocolArm64/Translation/ILMethodBuilder.cs
+++ b/ChocolArm64/Translation/ILMethodBuilder.cs
@@ -8,7 +8,10 @@ namespace ChocolArm64.Translation
 {
     class ILMethodBuilder
     {
-        public LocalAlloc LocalAlloc { get; private set; }
+        private const int RegsCount = 32;
+        private const int RegsMask  = RegsCount - 1;
+
+        public RegisterUsage RegUsage { get; private set; }
 
         public ILGenerator Generator { get; private set; }
 
@@ -18,29 +21,47 @@ namespace ChocolArm64.Translation
 
         private string _subName;
 
+        public bool IsAarch64 { get; }
+
+        public bool IsSubComplete { get; }
+
         private int _localsCount;
 
-        public ILMethodBuilder(ILBlock[] ilBlocks, string subName)
+        public ILMethodBuilder(
+            ILBlock[] ilBlocks,
+            string    subName,
+            bool      isAarch64,
+            bool      isSubComplete = false)
         {
-            _ilBlocks = ilBlocks;
-            _subName  = subName;
+            _ilBlocks     = ilBlocks;
+            _subName      = subName;
+            IsAarch64     = isAarch64;
+            IsSubComplete = isSubComplete;
         }
 
-        public TranslatedSub GetSubroutine(TranslationTier tier)
+        public TranslatedSub GetSubroutine(TranslationTier tier, bool isWorthOptimizing)
         {
-            LocalAlloc = new LocalAlloc(_ilBlocks, _ilBlocks[0]);
+            RegUsage = new RegisterUsage();
+
+            RegUsage.BuildUses(_ilBlocks[0]);
 
             DynamicMethod method = new DynamicMethod(_subName, typeof(long), TranslatedSub.FixedArgTypes);
 
-            Generator = method.GetILGenerator();
+            long intNiRegsMask = RegUsage.GetIntNotInputs(_ilBlocks[0]);
+            long vecNiRegsMask = RegUsage.GetVecNotInputs(_ilBlocks[0]);
 
-            TranslatedSub subroutine = new TranslatedSub(method, tier);
+            TranslatedSub subroutine = new TranslatedSub(
+                method,
+                intNiRegsMask,
+                vecNiRegsMask,
+                tier,
+                isWorthOptimizing);
 
             _locals = new Dictionary<Register, int>();
 
             _localsCount = 0;
 
-            new ILOpCodeLoadState(_ilBlocks[0]).Emit(this);
+            Generator = method.GetILGenerator();
 
             foreach (ILBlock ilBlock in _ilBlocks)
             {
@@ -80,13 +101,13 @@ namespace ChocolArm64.Translation
 
         public static Register GetRegFromBit(int bit, RegisterType baseType)
         {
-            if (bit < 32)
+            if (bit < RegsCount)
             {
                 return new Register(bit, baseType);
             }
             else if (baseType == RegisterType.Int)
             {
-                return new Register(bit & 0x1f, RegisterType.Flag);
+                return new Register(bit & RegsMask, RegisterType.Flag);
             }
             else
             {
@@ -96,7 +117,7 @@ namespace ChocolArm64.Translation
 
         public static bool IsRegIndex(int index)
         {
-            return (uint)index < 32;
+            return (uint)index < RegsCount;
         }
     }
 }
\ No newline at end of file
diff --git a/ChocolArm64/Translation/ILOpCode.cs b/ChocolArm64/Translation/ILOpCode.cs
index 4021603c01..486452820d 100644
--- a/ChocolArm64/Translation/ILOpCode.cs
+++ b/ChocolArm64/Translation/ILOpCode.cs
@@ -4,16 +4,16 @@ namespace ChocolArm64.Translation
 {
     struct ILOpCode : IILEmit
     {
-        private OpCode _ilOp;
+        public OpCode ILOp { get; }
 
         public ILOpCode(OpCode ilOp)
         {
-            _ilOp = ilOp;
+            ILOp = ilOp;
         }
 
         public void Emit(ILMethodBuilder context)
         {
-            context.Generator.Emit(_ilOp);
+            context.Generator.Emit(ILOp);
         }
     }
 }
\ No newline at end of file
diff --git a/ChocolArm64/Translation/ILOpCodeBranch.cs b/ChocolArm64/Translation/ILOpCodeBranch.cs
index 22b80b5d52..9d4e40fa9d 100644
--- a/ChocolArm64/Translation/ILOpCodeBranch.cs
+++ b/ChocolArm64/Translation/ILOpCodeBranch.cs
@@ -4,18 +4,18 @@ namespace ChocolArm64.Translation
 {
     struct ILOpCodeBranch : IILEmit
     {
-        private OpCode   _ilOp;
-        private ILLabel _label;
+        public OpCode  ILOp  { get; }
+        public ILLabel Label { get; }
 
         public ILOpCodeBranch(OpCode ilOp, ILLabel label)
         {
-            _ilOp  = ilOp;
-            _label = label;
+            ILOp  = ilOp;
+            Label = label;
         }
 
         public void Emit(ILMethodBuilder context)
         {
-            context.Generator.Emit(_ilOp, _label.GetLabel(context));
+            context.Generator.Emit(ILOp, Label.GetLabel(context));
         }
     }
 }
\ No newline at end of file
diff --git a/ChocolArm64/Translation/ILOpCodeCall.cs b/ChocolArm64/Translation/ILOpCodeCall.cs
index c046aeeb75..dc20417a9a 100644
--- a/ChocolArm64/Translation/ILOpCodeCall.cs
+++ b/ChocolArm64/Translation/ILOpCodeCall.cs
@@ -5,9 +5,9 @@ namespace ChocolArm64.Translation
 {
     struct ILOpCodeCall : IILEmit
     {
-        public MethodInfo Info { get; private set; }
+        public MethodInfo Info { get; }
 
-        public bool IsVirtual { get; private set; }
+        public bool IsVirtual { get; }
 
         public ILOpCodeCall(MethodInfo info, bool isVirtual)
         {
diff --git a/ChocolArm64/Translation/ILOpCodeConst.cs b/ChocolArm64/Translation/ILOpCodeConst.cs
index 2aaf8676ee..cd3b58ff04 100644
--- a/ChocolArm64/Translation/ILOpCodeConst.cs
+++ b/ChocolArm64/Translation/ILOpCodeConst.cs
@@ -16,6 +16,8 @@ namespace ChocolArm64.Translation
 
         private ImmVal _value;
 
+        public long Value => _value.I8;
+
         private enum ConstType
         {
             Int32,
diff --git a/ChocolArm64/Translation/ILOpCodeLoad.cs b/ChocolArm64/Translation/ILOpCodeLoad.cs
index c31e06bbd9..0d11eeaa4b 100644
--- a/ChocolArm64/Translation/ILOpCodeLoad.cs
+++ b/ChocolArm64/Translation/ILOpCodeLoad.cs
@@ -5,28 +5,28 @@ namespace ChocolArm64.Translation
 {
     struct ILOpCodeLoad : IILEmit
     {
-        public int Index { get; private set; }
+        public int Index { get; }
 
-        public IoType IoType { get; private set; }
+        public VarType VarType { get; }
 
-        public RegisterSize RegisterSize { get; private set; }
+        public RegisterSize RegisterSize { get; }
 
-        public ILOpCodeLoad(int index, IoType ioType, RegisterSize registerSize = 0)
+        public ILOpCodeLoad(int index, VarType varType, RegisterSize registerSize = 0)
         {
             Index        = index;
-            IoType       = ioType;
+            VarType      = varType;
             RegisterSize = registerSize;
         }
 
         public void Emit(ILMethodBuilder context)
         {
-            switch (IoType)
+            switch (VarType)
             {
-                case IoType.Arg: context.Generator.EmitLdarg(Index); break;
+                case VarType.Arg: context.Generator.EmitLdarg(Index); break;
 
-                case IoType.Flag:   EmitLdloc(context, Index, RegisterType.Flag);   break;
-                case IoType.Int:    EmitLdloc(context, Index, RegisterType.Int);    break;
-                case IoType.Vector: EmitLdloc(context, Index, RegisterType.Vector); break;
+                case VarType.Flag:   EmitLdloc(context, Index, RegisterType.Flag);   break;
+                case VarType.Int:    EmitLdloc(context, Index, RegisterType.Int);    break;
+                case VarType.Vector: EmitLdloc(context, Index, RegisterType.Vector); break;
             }
         }
 
diff --git a/ChocolArm64/Translation/ILOpCodeLoadField.cs b/ChocolArm64/Translation/ILOpCodeLoadField.cs
index abcd37c348..f0507ac226 100644
--- a/ChocolArm64/Translation/ILOpCodeLoadField.cs
+++ b/ChocolArm64/Translation/ILOpCodeLoadField.cs
@@ -5,7 +5,7 @@ namespace ChocolArm64.Translation
 {
     struct ILOpCodeLoadField : IILEmit
     {
-        public FieldInfo Info { get; private set; }
+        public FieldInfo Info { get; }
 
         public ILOpCodeLoadField(FieldInfo info)
         {
diff --git a/ChocolArm64/Translation/ILOpCodeLoadState.cs b/ChocolArm64/Translation/ILOpCodeLoadState.cs
index ddab611019..c23dc94329 100644
--- a/ChocolArm64/Translation/ILOpCodeLoadState.cs
+++ b/ChocolArm64/Translation/ILOpCodeLoadState.cs
@@ -7,15 +7,24 @@ namespace ChocolArm64.Translation
     {
         private ILBlock _block;
 
-        public ILOpCodeLoadState(ILBlock block)
+        private bool _isSubEntry;
+
+        public ILOpCodeLoadState(ILBlock block, bool isSubEntry = false)
         {
-            _block = block;
+            _block      = block;
+            _isSubEntry = isSubEntry;
         }
 
         public void Emit(ILMethodBuilder context)
         {
-            long intInputs = context.LocalAlloc.GetIntInputs(_block);
-            long vecInputs = context.LocalAlloc.GetVecInputs(_block);
+            long intInputs = context.RegUsage.GetIntInputs(_block);
+            long vecInputs = context.RegUsage.GetVecInputs(_block);
+
+            if (Optimizations.AssumeStrictAbiCompliance && context.IsSubComplete)
+            {
+                intInputs = RegisterUsage.ClearCallerSavedIntRegs(intInputs, context.IsAarch64);
+                vecInputs = RegisterUsage.ClearCallerSavedVecRegs(vecInputs, context.IsAarch64);
+            }
 
             LoadLocals(context, intInputs, RegisterType.Int);
             LoadLocals(context, vecInputs, RegisterType.Vector);
diff --git a/ChocolArm64/Translation/ILOpCodeLog.cs b/ChocolArm64/Translation/ILOpCodeLog.cs
index ebb042b596..53846f927e 100644
--- a/ChocolArm64/Translation/ILOpCodeLog.cs
+++ b/ChocolArm64/Translation/ILOpCodeLog.cs
@@ -2,16 +2,16 @@ namespace ChocolArm64.Translation
 {
     struct ILOpCodeLog : IILEmit
     {
-        private string _text;
+        public string Text { get; }
 
         public ILOpCodeLog(string text)
         {
-            _text = text;
+            Text = text;
         }
 
         public void Emit(ILMethodBuilder context)
         {
-            context.Generator.EmitWriteLine(_text);
+            context.Generator.EmitWriteLine(Text);
         }
     }
 }
\ No newline at end of file
diff --git a/ChocolArm64/Translation/ILOpCodeStore.cs b/ChocolArm64/Translation/ILOpCodeStore.cs
index 17a6259c6f..7ac78e9ae4 100644
--- a/ChocolArm64/Translation/ILOpCodeStore.cs
+++ b/ChocolArm64/Translation/ILOpCodeStore.cs
@@ -5,28 +5,28 @@ namespace ChocolArm64.Translation
 {
     struct ILOpCodeStore : IILEmit
     {
-        public int Index { get; private set; }
+        public int Index { get; }
 
-        public IoType IoType { get; private set; }
+        public VarType VarType { get; }
 
-        public RegisterSize RegisterSize { get; private set; }
+        public RegisterSize RegisterSize { get; }
 
-        public ILOpCodeStore(int index, IoType ioType, RegisterSize registerSize = 0)
+        public ILOpCodeStore(int index, VarType varType, RegisterSize registerSize = 0)
         {
             Index        = index;
-            IoType       = ioType;
+            VarType      = varType;
             RegisterSize = registerSize;
         }
 
         public void Emit(ILMethodBuilder context)
         {
-            switch (IoType)
+            switch (VarType)
             {
-                case IoType.Arg: context.Generator.EmitStarg(Index); break;
+                case VarType.Arg: context.Generator.EmitStarg(Index); break;
 
-                case IoType.Flag:   EmitStloc(context, Index, RegisterType.Flag);   break;
-                case IoType.Int:    EmitStloc(context, Index, RegisterType.Int);    break;
-                case IoType.Vector: EmitStloc(context, Index, RegisterType.Vector); break;
+                case VarType.Flag:   EmitStloc(context, Index, RegisterType.Flag);   break;
+                case VarType.Int:    EmitStloc(context, Index, RegisterType.Int);    break;
+                case VarType.Vector: EmitStloc(context, Index, RegisterType.Vector); break;
             }
         }
 
diff --git a/ChocolArm64/Translation/ILOpCodeStoreState.cs b/ChocolArm64/Translation/ILOpCodeStoreState.cs
index 458e9eda43..a587dbfe84 100644
--- a/ChocolArm64/Translation/ILOpCodeStoreState.cs
+++ b/ChocolArm64/Translation/ILOpCodeStoreState.cs
@@ -7,15 +7,33 @@ namespace ChocolArm64.Translation
     {
         private ILBlock _block;
 
-        public ILOpCodeStoreState(ILBlock block)
+        private TranslatedSub _callSub;
+
+        public ILOpCodeStoreState(ILBlock block, TranslatedSub callSub = null)
         {
-            _block = block;
+            _block   = block;
+            _callSub = callSub;
         }
 
         public void Emit(ILMethodBuilder context)
         {
-            long intOutputs = context.LocalAlloc.GetIntOutputs(_block);
-            long vecOutputs = context.LocalAlloc.GetVecOutputs(_block);
+            long intOutputs = context.RegUsage.GetIntOutputs(_block);
+            long vecOutputs = context.RegUsage.GetVecOutputs(_block);
+
+            if (Optimizations.AssumeStrictAbiCompliance && context.IsSubComplete)
+            {
+                intOutputs = RegisterUsage.ClearCallerSavedIntRegs(intOutputs, context.IsAarch64);
+                vecOutputs = RegisterUsage.ClearCallerSavedVecRegs(vecOutputs, context.IsAarch64);
+            }
+
+            if (_callSub != null)
+            {
+                //Those register are assigned on the callee function, without
+                //reading it's value first. We don't need to write them because
+                //they are not going to be read on the callee.
+                intOutputs &= ~_callSub.IntNiRegsMask;
+                vecOutputs &= ~_callSub.VecNiRegsMask;
+            }
 
             StoreLocals(context, intOutputs, RegisterType.Int);
             StoreLocals(context, vecOutputs, RegisterType.Vector);
diff --git a/ChocolArm64/Translation/LocalAlloc.cs b/ChocolArm64/Translation/RegisterUsage.cs
similarity index 56%
rename from ChocolArm64/Translation/LocalAlloc.cs
rename to ChocolArm64/Translation/RegisterUsage.cs
index 763be6190d..2e6829d512 100644
--- a/ChocolArm64/Translation/LocalAlloc.cs
+++ b/ChocolArm64/Translation/RegisterUsage.cs
@@ -3,8 +3,13 @@ using System.Collections.Generic;
 
 namespace ChocolArm64.Translation
 {
-    class LocalAlloc
+    class RegisterUsage
     {
+        public const long CallerSavedIntRegistersMask = 0x7fL  << 9;
+        public const long PStateNzcvFlagsMask         = 0xfL   << 60;
+
+        public const long CallerSavedVecRegistersMask = 0xffffL << 16;
+
         private class PathIo
         {
             private Dictionary<ILBlock, long> _allInputs;
@@ -18,31 +23,30 @@ namespace ChocolArm64.Translation
                 _cmnOutputs = new Dictionary<ILBlock, long>();
             }
 
-            public PathIo(ILBlock root, long inputs, long outputs) : this()
+            public void Set(ILBlock entry, long inputs, long outputs)
             {
-                Set(root, inputs, outputs);
-            }
-
-            public void Set(ILBlock root, long inputs, long outputs)
-            {
-                if (!_allInputs.TryAdd(root, inputs))
+                if (!_allInputs.TryAdd(entry, inputs))
                 {
-                    _allInputs[root] |= inputs;
+                    _allInputs[entry] |= inputs;
                 }
 
-                if (!_cmnOutputs.TryAdd(root, outputs))
+                if (!_cmnOutputs.TryAdd(entry, outputs))
                 {
-                    _cmnOutputs[root] &= outputs;
+                    _cmnOutputs[entry] &= outputs;
                 }
 
                 _allOutputs |= outputs;
             }
 
-            public long GetInputs(ILBlock root)
+            public long GetInputs(ILBlock entry)
             {
-                if (_allInputs.TryGetValue(root, out long inputs))
+                if (_allInputs.TryGetValue(entry, out long inputs))
                 {
-                    return inputs | (_allOutputs & ~_cmnOutputs[root]);
+                    //We also need to read the registers that may not be written
+                    //by all paths that can reach a exit point, to ensure that
+                    //the local variable will not remain uninitialized depending
+                    //on the flow path taken.
+                    return inputs | (_allOutputs & ~_cmnOutputs[entry]);
                 }
 
                 return 0;
@@ -57,15 +61,38 @@ namespace ChocolArm64.Translation
         private Dictionary<ILBlock, PathIo> _intPaths;
         private Dictionary<ILBlock, PathIo> _vecPaths;
 
-        private struct BlockIo
+        private struct BlockIo : IEquatable<BlockIo>
         {
-            public ILBlock Block;
-            public ILBlock Entry;
+            public ILBlock Block { get; }
+            public ILBlock Entry { get; }
 
-            public long IntInputs;
-            public long VecInputs;
-            public long IntOutputs;
-            public long VecOutputs;
+            public long IntInputs  { get; set; }
+            public long VecInputs  { get; set; }
+            public long IntOutputs { get; set; }
+            public long VecOutputs { get; set; }
+
+            public BlockIo(ILBlock block, ILBlock entry)
+            {
+                Block = block;
+                Entry = entry;
+
+                IntInputs = IntOutputs = 0;
+                VecInputs = VecOutputs = 0;
+            }
+
+            public BlockIo(
+                ILBlock block,
+                ILBlock entry,
+                long    intInputs,
+                long    vecInputs,
+                long    intOutputs,
+                long    vecOutputs) : this(block, entry)
+            {
+                IntInputs  = intInputs;
+                VecInputs  = vecInputs;
+                IntOutputs = intOutputs;
+                VecOutputs = vecOutputs;
+            }
 
             public override bool Equals(object obj)
             {
@@ -74,6 +101,11 @@ namespace ChocolArm64.Translation
                     return false;
                 }
 
+                return Equals(other);
+            }
+
+            public bool Equals(BlockIo other)
+            {
                 return other.Block      == Block      &&
                        other.Entry      == Entry      &&
                        other.IntInputs  == IntInputs  &&
@@ -98,25 +130,13 @@ namespace ChocolArm64.Translation
             }
         }
 
-        private const int MaxOptGraphLength = 40;
-
-        public LocalAlloc(ILBlock[] graph, ILBlock entry)
+        public RegisterUsage()
         {
             _intPaths = new Dictionary<ILBlock, PathIo>();
             _vecPaths = new Dictionary<ILBlock, PathIo>();
-
-            if (graph.Length > 1 &&
-                graph.Length < MaxOptGraphLength)
-            {
-                InitializeOptimal(graph, entry);
-            }
-            else
-            {
-                InitializeFast(graph);
-            }
         }
 
-        private void InitializeOptimal(ILBlock[] graph, ILBlock entry)
+        public void BuildUses(ILBlock entry)
         {
             //This will go through all possible paths on the graph,
             //and store all inputs/outputs for each block. A register
@@ -124,7 +144,7 @@ namespace ChocolArm64.Translation
             //When a block can be reached by more than one path, then the
             //output from all paths needs to be set for this block, and
             //only outputs present in all of the parent blocks can be considered
-            //when doing input elimination. Each block chain have a entry, that's where
+            //when doing input elimination. Each block chain has a entry, that's where
             //the code starts executing. They are present on the subroutine start point,
             //and on call return points too (address written to X30 by BL).
             HashSet<BlockIo> visited = new HashSet<BlockIo>();
@@ -133,19 +153,13 @@ namespace ChocolArm64.Translation
 
             void Enqueue(BlockIo block)
             {
-                if (!visited.Contains(block))
+                if (visited.Add(block))
                 {
                     unvisited.Enqueue(block);
-
-                    visited.Add(block);
                 }
             }
 
-            Enqueue(new BlockIo()
-            {
-                Block = entry,
-                Entry = entry
-            });
+            Enqueue(new BlockIo(entry, entry));
 
             while (unvisited.Count > 0)
             {
@@ -177,19 +191,21 @@ namespace ChocolArm64.Translation
 
                 void EnqueueFromCurrent(ILBlock block, bool retTarget)
                 {
-                    BlockIo blockIo = new BlockIo() { Block = block };
+                    BlockIo blockIo;
 
                     if (retTarget)
                     {
-                        blockIo.Entry = block;
+                        blockIo = new BlockIo(block, block);
                     }
                     else
                     {
-                        blockIo.Entry      = current.Entry;
-                        blockIo.IntInputs  = current.IntInputs;
-                        blockIo.VecInputs  = current.VecInputs;
-                        blockIo.IntOutputs = current.IntOutputs;
-                        blockIo.VecOutputs = current.VecOutputs;
+                        blockIo = new BlockIo(
+                            block,
+                            current.Entry,
+                            current.IntInputs,
+                            current.VecInputs,
+                            current.IntOutputs,
+                            current.VecOutputs);
                     }
 
                     Enqueue(blockIo);
@@ -207,54 +223,63 @@ namespace ChocolArm64.Translation
             }
         }
 
-        private void InitializeFast(ILBlock[] graph)
-        {
-            //This is WAY faster than InitializeOptimal, but results in
-            //unneeded loads and stores, so the resulting code will be slower.
-            long intInputs = 0, intOutputs = 0;
-            long vecInputs = 0, vecOutputs = 0;
+        public long GetIntInputs(ILBlock entry) => GetInputsImpl(entry, _intPaths.Values);
+        public long GetVecInputs(ILBlock entry) => GetInputsImpl(entry, _vecPaths.Values);
 
-            foreach (ILBlock block in graph)
-            {
-                intInputs  |= block.IntInputs;
-                intOutputs |= block.IntOutputs;
-                vecInputs  |= block.VecInputs;
-                vecOutputs |= block.VecOutputs;
-            }
-
-            //It's possible that not all code paths writes to those output registers,
-            //in those cases if we attempt to write an output registers that was
-            //not written, we will be just writing zero and messing up the old register value.
-            //So we just need to ensure that all outputs are loaded.
-            if (graph.Length > 1)
-            {
-                intInputs |= intOutputs;
-                vecInputs |= vecOutputs;
-            }
-
-            foreach (ILBlock block in graph)
-            {
-                _intPaths.Add(block, new PathIo(block, intInputs, intOutputs));
-                _vecPaths.Add(block, new PathIo(block, vecInputs, vecOutputs));
-            }
-        }
-
-        public long GetIntInputs(ILBlock root) => GetInputsImpl(root, _intPaths.Values);
-        public long GetVecInputs(ILBlock root) => GetInputsImpl(root, _vecPaths.Values);
-
-        private long GetInputsImpl(ILBlock root, IEnumerable<PathIo> values)
+        private long GetInputsImpl(ILBlock entry, IEnumerable<PathIo> values)
         {
             long inputs = 0;
 
             foreach (PathIo path in values)
             {
-                inputs |= path.GetInputs(root);
+                inputs |= path.GetInputs(entry);
             }
 
             return inputs;
         }
 
+        public long GetIntNotInputs(ILBlock entry) => GetNotInputsImpl(entry, _intPaths.Values);
+        public long GetVecNotInputs(ILBlock entry) => GetNotInputsImpl(entry, _vecPaths.Values);
+
+        private long GetNotInputsImpl(ILBlock entry, IEnumerable<PathIo> values)
+        {
+            //Returns a mask with registers that are written to
+            //before being read. Only those registers that are
+            //written in all paths, and is not read before being
+            //written to on those paths, should be set on the mask.
+            long mask = -1L;
+
+            foreach (PathIo path in values)
+            {
+                mask &= path.GetOutputs() & ~path.GetInputs(entry);
+            }
+
+            return mask;
+        }
+
         public long GetIntOutputs(ILBlock block) => _intPaths[block].GetOutputs();
         public long GetVecOutputs(ILBlock block) => _vecPaths[block].GetOutputs();
+
+        public static long ClearCallerSavedIntRegs(long mask, bool isAarch64)
+        {
+            //TODO: ARM32 support.
+            if (isAarch64)
+            {
+                mask &= ~(CallerSavedIntRegistersMask | PStateNzcvFlagsMask);
+            }
+
+            return mask;
+        }
+
+        public static long ClearCallerSavedVecRegs(long mask, bool isAarch64)
+        {
+            //TODO: ARM32 support.
+            if (isAarch64)
+            {
+                mask &= ~CallerSavedVecRegistersMask;
+            }
+
+            return mask;
+        }
     }
 }
\ No newline at end of file
diff --git a/ChocolArm64/Translation/TranslatedSub.cs b/ChocolArm64/Translation/TranslatedSub.cs
index 65d7035107..8b599b7a93 100644
--- a/ChocolArm64/Translation/TranslatedSub.cs
+++ b/ChocolArm64/Translation/TranslatedSub.cs
@@ -10,21 +10,41 @@ namespace ChocolArm64.Translation
 
     class TranslatedSub
     {
+        //This is the minimum amount of calls needed for the method
+        //to be retranslated with higher quality code. It's only worth
+        //doing that for hot code.
+        private const int MinCallCountForOpt = 30;
+
         public ArmSubroutine Delegate { get; private set; }
 
-        public static int StateArgIdx  { get; private set; }
-        public static int MemoryArgIdx { get; private set; }
+        public static int StateArgIdx  { get; }
+        public static int MemoryArgIdx { get; }
 
-        public static Type[] FixedArgTypes { get; private set; }
+        public static Type[] FixedArgTypes { get; }
 
-        public DynamicMethod Method { get; private set; }
+        public DynamicMethod Method { get; }
 
-        public TranslationTier Tier { get; private set; }
+        public TranslationTier Tier { get; }
 
-        public TranslatedSub(DynamicMethod method, TranslationTier tier)
+        public long IntNiRegsMask { get; }
+        public long VecNiRegsMask { get; }
+
+        private bool _isWorthOptimizing;
+
+        private int _callCount;
+
+        public TranslatedSub(
+            DynamicMethod   method,
+            long            intNiRegsMask,
+            long            vecNiRegsMask,
+            TranslationTier tier,
+            bool            isWorthOptimizing)
         {
-            Method = method ?? throw new ArgumentNullException(nameof(method));;
-            Tier   = tier;
+            Method             = method ?? throw new ArgumentNullException(nameof(method));;
+            IntNiRegsMask      = intNiRegsMask;
+            VecNiRegsMask      = vecNiRegsMask;
+            _isWorthOptimizing = isWorthOptimizing;
+            Tier               = tier;
         }
 
         static TranslatedSub()
@@ -61,5 +81,24 @@ namespace ChocolArm64.Translation
         {
             return Delegate(threadState, memory);
         }
+
+        public bool IsWorthOptimizing()
+        {
+           if (!_isWorthOptimizing)
+            {
+                return false;
+            }
+
+            if (_callCount++ < MinCallCountForOpt)
+            {
+                return false;
+            }
+
+            //Only return true once, so that it is
+            //added to the queue only once.
+            _isWorthOptimizing = false;
+
+            return true;
+        }
     }
 }
\ No newline at end of file
diff --git a/ChocolArm64/Translation/Translator.cs b/ChocolArm64/Translation/Translator.cs
index dd1215f50c..bda0bca09f 100644
--- a/ChocolArm64/Translation/Translator.cs
+++ b/ChocolArm64/Translation/Translator.cs
@@ -63,48 +63,36 @@ namespace ChocolArm64.Translation
                     CpuTrace?.Invoke(this, new CpuTraceEventArgs(position));
                 }
 
-                TranslatedSub subroutine = GetOrTranslateSubroutine(state, position);
+                if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
+                {
+                    sub = TranslateLowCq(position, state.GetExecutionMode());
+                }
 
-                position = subroutine.Execute(state, _memory);
+                position = sub.Execute(state, _memory);
             }
             while (position != 0 && state.Running);
 
             state.CurrentTranslator = null;
         }
 
-        internal void TranslateVirtualSubroutine(CpuThreadState state, long position)
-        {
-            if (!_cache.TryGetSubroutine(position, out TranslatedSub sub) || sub.Tier == TranslationTier.Tier0)
-            {
-                _queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1));
-            }
-        }
-
-        internal ArmSubroutine GetOrTranslateVirtualSubroutine(CpuThreadState state, long position)
+        internal ArmSubroutine GetOrTranslateSubroutine(CpuThreadState state, long position, CallType cs)
         {
             if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
             {
                 sub = TranslateLowCq(position, state.GetExecutionMode());
             }
 
-            if (sub.Tier == TranslationTier.Tier0)
+            if (sub.IsWorthOptimizing())
             {
-                _queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1));
+                bool isComplete = cs == CallType.Call ||
+                                  cs == CallType.VirtualCall;
+
+                _queue.Enqueue(position, state.GetExecutionMode(), TranslationTier.Tier1, isComplete);
             }
 
             return sub.Delegate;
         }
 
-        internal TranslatedSub GetOrTranslateSubroutine(CpuThreadState state, long position)
-        {
-            if (!_cache.TryGetSubroutine(position, out TranslatedSub subroutine))
-            {
-                subroutine = TranslateLowCq(position, state.GetExecutionMode());
-            }
-
-            return subroutine;
-        }
-
         private void TranslateQueuedSubs()
         {
             while (_threadCount != 0)
@@ -124,7 +112,7 @@ namespace ChocolArm64.Translation
                     }
                     else
                     {
-                        TranslateHighCq(item.Position, item.Mode);
+                        TranslateHighCq(item.Position, item.Mode, item.IsComplete);
                     }
                 }
                 else
@@ -142,14 +130,16 @@ namespace ChocolArm64.Translation
 
             string subName = GetSubroutineName(position);
 
-            ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName);
+            bool isAarch64 = mode == ExecutionMode.Aarch64;
 
-            TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0);
+            ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName, isAarch64);
+
+            TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0, isWorthOptimizing: true);
 
             return _cache.GetOrAdd(position, subroutine, block.OpCodes.Count);
         }
 
-        private void TranslateHighCq(long position, ExecutionMode mode)
+        private TranslatedSub TranslateHighCq(long position, ExecutionMode mode, bool isComplete)
         {
             Block graph = Decoder.DecodeSubroutine(_memory, position, mode);
 
@@ -159,9 +149,13 @@ namespace ChocolArm64.Translation
 
             string subName = GetSubroutineName(position);
 
-            ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName);
+            bool isAarch64 = mode == ExecutionMode.Aarch64;
 
-            TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1);
+            isComplete &= !context.HasIndirectJump;
+
+            ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName, isAarch64, isComplete);
+
+            TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1, context.HasSlowCall);
 
             int ilOpCount = 0;
 
@@ -170,9 +164,11 @@ namespace ChocolArm64.Translation
                 ilOpCount += ilBlock.Count;
             }
 
+            ForceAheadOfTimeCompilation(subroutine);
+
             _cache.AddOrUpdate(position, subroutine, ilOpCount);
 
-            ForceAheadOfTimeCompilation(subroutine);
+            return subroutine;
         }
 
         private string GetSubroutineName(long position)
diff --git a/ChocolArm64/Translation/TranslatorQueue.cs b/ChocolArm64/Translation/TranslatorQueue.cs
index 89d665bfbd..0f1d847470 100644
--- a/ChocolArm64/Translation/TranslatorQueue.cs
+++ b/ChocolArm64/Translation/TranslatorQueue.cs
@@ -1,3 +1,4 @@
+using ChocolArm64.State;
 using System.Collections.Concurrent;
 using System.Threading;
 
@@ -5,10 +6,6 @@ namespace ChocolArm64.Translation
 {
     class TranslatorQueue
     {
-        //This is the maximum number of functions to be translated that the queue can hold.
-        //The value may need some tuning to find the sweet spot.
-        private const int MaxQueueSize = 1024;
-
         private ConcurrentStack<TranslatorQueueItem>[] _translationQueue;
 
         private ManualResetEvent _queueDataReceivedEvent;
@@ -27,14 +24,11 @@ namespace ChocolArm64.Translation
             _queueDataReceivedEvent = new ManualResetEvent(false);
         }
 
-        public void Enqueue(TranslatorQueueItem item)
+        public void Enqueue(long position, ExecutionMode mode, TranslationTier tier, bool isComplete)
         {
-            ConcurrentStack<TranslatorQueueItem> queue = _translationQueue[(int)item.Tier];
+            TranslatorQueueItem item = new TranslatorQueueItem(position, mode, tier, isComplete);
 
-            if (queue.Count >= MaxQueueSize)
-            {
-                queue.TryPop(out _);
-            }
+            ConcurrentStack<TranslatorQueueItem> queue = _translationQueue[(int)tier];
 
             queue.Push(item);
 
diff --git a/ChocolArm64/Translation/TranslatorQueueItem.cs b/ChocolArm64/Translation/TranslatorQueueItem.cs
index 0988414a50..dde2706d98 100644
--- a/ChocolArm64/Translation/TranslatorQueueItem.cs
+++ b/ChocolArm64/Translation/TranslatorQueueItem.cs
@@ -10,11 +10,18 @@ namespace ChocolArm64.Translation
 
         public TranslationTier Tier { get; }
 
-        public TranslatorQueueItem(long position, ExecutionMode mode, TranslationTier tier)
+        public bool IsComplete { get; }
+
+        public TranslatorQueueItem(
+            long            position,
+            ExecutionMode   mode,
+            TranslationTier tier,
+            bool            isComplete = false)
         {
-            Position = position;
-            Mode     = mode;
-            Tier     = tier;
+            Position   = position;
+            Mode       = mode;
+            Tier       = tier;
+            IsComplete = isComplete;
         }
     }
 }
\ No newline at end of file
diff --git a/ChocolArm64/Translation/IoType.cs b/ChocolArm64/Translation/VarType.cs
similarity index 85%
rename from ChocolArm64/Translation/IoType.cs
rename to ChocolArm64/Translation/VarType.cs
index c7710e0c67..d671575e98 100644
--- a/ChocolArm64/Translation/IoType.cs
+++ b/ChocolArm64/Translation/VarType.cs
@@ -1,6 +1,6 @@
 namespace ChocolArm64.Translation
 {
-    enum IoType
+    enum VarType
     {
         Arg,
         Flag,
diff --git a/Ryujinx/Config.jsonc b/Ryujinx/Config.jsonc
index 8b5ebe0328..6e808b56fd 100644
--- a/Ryujinx/Config.jsonc
+++ b/Ryujinx/Config.jsonc
@@ -29,18 +29,21 @@
     // System Language list: https://gist.github.com/HorrorTroll/b6e4a88d774c3c9b3bdf54d79a7ca43b
     "system_language": "AmericanEnglish",
 
-    // Enable or Disable Docked Mode
+    // Enable or disable Docked Mode
     "docked_mode": false,
-    
-    // Enable or Disable Game Vsync
+
+    // Enable or disable Game Vsync
     "enable_vsync": true,
-    
-    // Enable or Disable Multi-core scheduling of threads
+
+    // Enable or disable Multi-core scheduling of threads
     "enable_multicore_scheduling": true,
-    
+
     // Enable integrity checks on Switch content files
     "enable_fs_integrity_checks": true,
-    
+
+    // Enable or disable aggressive CPU optimizations
+    "enable_aggressive_cpu_opts": true,
+
     // The primary controller's type
     // Supported Values: Handheld, ProController, NpadPair, NpadLeft, NpadRight
     "controller_type": "Handheld",
diff --git a/Ryujinx/Configuration.cs b/Ryujinx/Configuration.cs
index dbbec1cbc6..c4a1b4369f 100644
--- a/Ryujinx/Configuration.cs
+++ b/Ryujinx/Configuration.cs
@@ -86,6 +86,11 @@ namespace Ryujinx
         /// </summary>
         public bool EnableFsIntegrityChecks { get; private set; }
 
+        /// <summary>
+        /// Enable or Disable aggressive CPU optimizations
+        /// </summary>
+        public bool EnableAggressiveCpuOpts { get; private set; }
+
         /// <summary>
         ///  The primary controller's type
         /// </summary>
@@ -197,6 +202,11 @@ namespace Ryujinx
                 ? IntegrityCheckLevel.ErrorOnInvalid
                 : IntegrityCheckLevel.None;
 
+            if (Instance.EnableAggressiveCpuOpts)
+            {
+                Optimizations.AssumeStrictAbiCompliance = true;
+            }
+
             if(Instance.GamepadControls.Enabled)
             {
                 if (GamePad.GetName(Instance.GamepadControls.Index) == "Unmapped Controller")
diff --git a/Ryujinx/_schema.json b/Ryujinx/_schema.json
index 0e586671d6..7e7e466594 100644
--- a/Ryujinx/_schema.json
+++ b/Ryujinx/_schema.json
@@ -17,6 +17,7 @@
     "enable_vsync",
     "enable_multicore_scheduling",
     "enable_fs_integrity_checks",
+    "enable_aggressive_cpu_opts",
     "controller_type",
     "keyboard_controls",
     "gamepad_controls"
@@ -399,6 +400,17 @@
         false
       ]
     },
+    "enable_aggressive_cpu_opts": {
+      "$id": "#/properties/enable_aggressive_cpu_opts",
+      "type": "boolean",
+      "title": "Enable Aggressive CPU Optimizations",
+      "description": "Enable or disable aggressive CPU optimizations",
+      "default": true,
+      "examples": [
+        true,
+        false
+      ]
+    },
     "controller_type": {
       "$id": "#/properties/controller_type",
       "type": "string",