diff --git a/ARMeilleure/ARMeilleure.csproj b/ARMeilleure/ARMeilleure.csproj
index bb3f47219a..58fd04b38b 100644
--- a/ARMeilleure/ARMeilleure.csproj
+++ b/ARMeilleure/ARMeilleure.csproj
@@ -9,4 +9,11 @@
     <ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" />
   </ItemGroup>
 
+  <ItemGroup>
+    <ContentWithTargetPath Include="Native\libs\libarmeilleure-jitsupport.dylib" Condition="'$(RuntimeIdentifier)' == '' OR '$(RuntimeIdentifier)' == 'osx-arm64'">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+      <TargetPath>libarmeilleure-jitsupport.dylib</TargetPath>
+    </ContentWithTargetPath>
+  </ItemGroup>
+
 </Project>
diff --git a/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs b/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs
new file mode 100644
index 0000000000..fdd4d02414
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs
@@ -0,0 +1,270 @@
+using ARMeilleure.CodeGen.Optimizations;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System.Collections.Generic;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+    static class Arm64Optimizer
+    {
+        private const int MaxConstantUses = 10000;
+
+        public static void RunPass(ControlFlowGraph cfg)
+        {
+            var constants = new Dictionary<ulong, Operand>();
+
+            Operand GetConstantCopy(BasicBlock block, Operation operation, Operand source)
+            {
+                // If the constant has many uses, we also force a new constant mov to be added, in order
+                // to avoid overflow of the counts field (that is limited to 16 bits).
+                if (!constants.TryGetValue(source.Value, out var constant) || constant.UsesCount > MaxConstantUses)
+                {
+                    constant = Local(source.Type);
+
+                    Operation copyOp = Operation(Instruction.Copy, constant, source);
+
+                    block.Operations.AddBefore(operation, copyOp);
+
+                    constants[source.Value] = constant;
+                }
+
+                return constant;
+            }
+
+            for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+            {
+                constants.Clear();
+
+                Operation nextNode;
+
+                for (Operation node = block.Operations.First; node != default; node = nextNode)
+                {
+                    nextNode = node.ListNext;
+
+                    // Insert copies for constants that can't fit on a 32-bit immediate.
+                    // Doing this early unblocks a few optimizations.
+                    if (node.Instruction == Instruction.Add)
+                    {
+                        Operand src1 = node.GetSource(0);
+                        Operand src2 = node.GetSource(1);
+
+                        if (src1.Kind == OperandKind.Constant && (src1.Relocatable || ConstTooLong(src1, OperandType.I32)))
+                        {
+                            node.SetSource(0, GetConstantCopy(block, node, src1));
+                        }
+
+                        if (src2.Kind == OperandKind.Constant && (src2.Relocatable || ConstTooLong(src2, OperandType.I32)))
+                        {
+                            node.SetSource(1, GetConstantCopy(block, node, src2));
+                        }
+                    }
+
+                    // Try to fold something like:
+                    //  lsl x1, x1, #2
+                    //  add x0, x0, x1
+                    //  ldr x0, [x0]
+                    //  add x2, x2, #16
+                    //  ldr x2, [x2]
+                    // Into:
+                    //  ldr x0, [x0, x1, lsl #2]
+                    //  ldr x2, [x2, #16]
+                    if (IsMemoryLoadOrStore(node.Instruction))
+                    {
+                        OperandType type;
+
+                        if (node.Destination != default)
+                        {
+                            type = node.Destination.Type;
+                        }
+                        else
+                        {
+                            type = node.GetSource(1).Type;
+                        }
+
+                        Operand memOp = GetMemoryOperandOrNull(node.GetSource(0), type);
+
+                        if (memOp != default)
+                        {
+                            node.SetSource(0, memOp);
+                        }
+                    }
+                }
+            }
+
+            Optimizer.RemoveUnusedNodes(cfg);
+        }
+
+        private static Operand GetMemoryOperandOrNull(Operand addr, OperandType type)
+        {
+            Operand baseOp = addr;
+
+            // First we check if the address is the result of a local X with immediate
+            // addition. If that is the case, then the baseOp is X, and the memory operand immediate
+            // becomes the addition immediate. Otherwise baseOp keeps being the address.
+            int imm = GetConstOp(ref baseOp, type);
+            if (imm != 0)
+            {
+                return MemoryOp(type, baseOp, default, Multiplier.x1, imm);
+            }
+
+            // Now we check if the baseOp is the result of a local Y with a local Z addition.
+            // If that is the case, we now set baseOp to Y and indexOp to Z. We further check
+            // if Z is the result of a left shift of local W by a value == 0 or == Log2(AccessSize),
+            // if that is the case, we set indexOp to W and adjust the scale value of the memory operand
+            // to match that of the left shift.
+            // There is one missed case, which is the address being a shift result, but this is
+            // probably not worth optimizing as it should never happen.
+            (Operand indexOp, Multiplier scale) = GetIndexOp(ref baseOp, type);
+
+            // If baseOp is still equal to address, then there's nothing that can be optimized.
+            if (baseOp == addr)
+            {
+                return default;
+            }
+
+            return MemoryOp(type, baseOp, indexOp, scale, 0);
+        }
+
+        private static int GetConstOp(ref Operand baseOp, OperandType accessType)
+        {
+            Operation operation = GetAsgOpWithInst(baseOp, Instruction.Add);
+
+            if (operation == default)
+            {
+                return 0;
+            }
+
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+
+            Operand constOp;
+            Operand otherOp;
+
+            if (src1.Kind == OperandKind.Constant && src2.Kind == OperandKind.LocalVariable)
+            {
+                constOp = src1;
+                otherOp = src2;
+            }
+            else if (src1.Kind == OperandKind.LocalVariable && src2.Kind == OperandKind.Constant)
+            {
+                constOp = src2;
+                otherOp = src1;
+            }
+            else
+            {
+                return 0;
+            }
+
+            // If we have addition by a constant that we can't encode on the instruction,
+            // then we can't optimize it further.
+            if (ConstTooLong(constOp, accessType))
+            {
+                return 0;
+            }
+
+            baseOp = otherOp;
+
+            return constOp.AsInt32();
+        }
+
+        private static (Operand, Multiplier) GetIndexOp(ref Operand baseOp, OperandType accessType)
+        {
+            Operand indexOp = default;
+
+            Multiplier scale = Multiplier.x1;
+
+            Operation addOp = GetAsgOpWithInst(baseOp, Instruction.Add);
+
+            if (addOp == default)
+            {
+                return (indexOp, scale);
+            }
+
+            Operand src1 = addOp.GetSource(0);
+            Operand src2 = addOp.GetSource(1);
+
+            if (src1.Kind != OperandKind.LocalVariable || src2.Kind != OperandKind.LocalVariable)
+            {
+                return (indexOp, scale);
+            }
+
+            baseOp = src1;
+            indexOp = src2;
+
+            Operation shlOp = GetAsgOpWithInst(src1, Instruction.ShiftLeft);
+
+            bool indexOnSrc2 = false;
+
+            if (shlOp == default)
+            {
+                shlOp = GetAsgOpWithInst(src2, Instruction.ShiftLeft);
+
+                indexOnSrc2 = true;
+            }
+
+            if (shlOp != default)
+            {
+                Operand shSrc = shlOp.GetSource(0);
+                Operand shift = shlOp.GetSource(1);
+
+                int maxShift = Assembler.GetScaleForType(accessType);
+
+                if (shSrc.Kind == OperandKind.LocalVariable &&
+                    shift.Kind == OperandKind.Constant &&
+                    (shift.Value == 0 || shift.Value == (ulong)maxShift))
+                {
+                    scale = shift.Value switch
+                    {
+                        1 => Multiplier.x2,
+                        2 => Multiplier.x4,
+                        3 => Multiplier.x8,
+                        4 => Multiplier.x16,
+                        _ => Multiplier.x1
+                    };
+
+                    baseOp = indexOnSrc2 ? src1 : src2;
+                    indexOp = shSrc;
+                }
+            }
+
+            return (indexOp, scale);
+        }
+
+        private static Operation GetAsgOpWithInst(Operand op, Instruction inst)
+        {
+            // If we have multiple assignments, folding is not safe
+            // as the value may be different depending on the
+            // control flow path.
+            if (op.AssignmentsCount != 1)
+            {
+                return default;
+            }
+
+            Operation asgOp = op.Assignments[0];
+
+            if (asgOp.Instruction != inst)
+            {
+                return default;
+            }
+
+            return asgOp;
+        }
+
+        private static bool IsMemoryLoadOrStore(Instruction inst)
+        {
+            return inst == Instruction.Load || inst == Instruction.Store;
+        }
+
+        private static bool ConstTooLong(Operand constOp, OperandType accessType)
+        {
+            if ((uint)constOp.Value != constOp.Value)
+            {
+                return true;
+            }
+
+            return !CodeGenCommon.ConstFitsOnUImm12(constOp.AsInt32(), accessType);
+        }
+    }
+}
diff --git a/ARMeilleure/CodeGen/Arm64/ArmCondition.cs b/ARMeilleure/CodeGen/Arm64/ArmCondition.cs
new file mode 100644
index 0000000000..db27a81044
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/ArmCondition.cs
@@ -0,0 +1,47 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+    enum ArmCondition
+    {
+        Eq   = 0,
+        Ne   = 1,
+        GeUn = 2,
+        LtUn = 3,
+        Mi   = 4,
+        Pl   = 5,
+        Vs   = 6,
+        Vc   = 7,
+        GtUn = 8,
+        LeUn = 9,
+        Ge   = 10,
+        Lt   = 11,
+        Gt   = 12,
+        Le   = 13,
+        Al   = 14,
+        Nv   = 15
+    }
+
+    static class ComparisonArm64Extensions
+    {
+        public static ArmCondition ToArmCondition(this Comparison comp)
+        {
+            return comp switch
+            {
+                Comparison.Equal            => ArmCondition.Eq,
+                Comparison.NotEqual         => ArmCondition.Ne,
+                Comparison.Greater          => ArmCondition.Gt,
+                Comparison.LessOrEqual      => ArmCondition.Le,
+                Comparison.GreaterUI        => ArmCondition.GtUn,
+                Comparison.LessOrEqualUI    => ArmCondition.LeUn,
+                Comparison.GreaterOrEqual   => ArmCondition.Ge,
+                Comparison.Less             => ArmCondition.Lt,
+                Comparison.GreaterOrEqualUI => ArmCondition.GeUn,
+                Comparison.LessUI           => ArmCondition.LtUn,
+
+                _ => throw new ArgumentException(null, nameof(comp))
+            };
+        }
+    }
+}
diff --git a/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs b/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs
new file mode 100644
index 0000000000..062a6d0b7b
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.CodeGen.Arm64
+{
+    enum ArmExtensionType
+    {
+        Uxtb = 0,
+        Uxth = 1,
+        Uxtw = 2,
+        Uxtx = 3,
+        Sxtb = 4,
+        Sxth = 5,
+        Sxtw = 6,
+        Sxtx = 7
+    }
+}
diff --git a/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs b/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs
new file mode 100644
index 0000000000..d223a1464c
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs
@@ -0,0 +1,11 @@
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+    enum ArmShiftType
+    {
+        Lsl = 0,
+        Lsr = 1,
+        Asr = 2,
+        Ror = 3
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Arm64/Assembler.cs b/ARMeilleure/CodeGen/Arm64/Assembler.cs
new file mode 100644
index 0000000000..0ec0be7cb4
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/Assembler.cs
@@ -0,0 +1,1160 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Diagnostics;
+using System.IO;
+using static ARMeilleure.IntermediateRepresentation.Operand;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+    class Assembler
+    {
+        public const uint SfFlag = 1u << 31;
+
+        private const int SpRegister = 31;
+        private const int ZrRegister = 31;
+
+        private readonly Stream _stream;
+
+        public Assembler(Stream stream)
+        {
+            _stream = stream;
+        }
+
+        public void Add(Operand rd, Operand rn, Operand rm, ArmExtensionType extensionType, int shiftAmount = 0)
+        {
+            WriteInstructionAuto(0x0b200000u, rd, rn, rm, extensionType, shiftAmount);
+        }
+
+        public void Add(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0, bool immForm = false)
+        {
+            WriteInstructionAuto(0x11000000u, 0x0b000000u, rd, rn, rm, shiftType, shiftAmount, immForm);
+        }
+
+        public void And(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+        {
+            WriteInstructionBitwiseAuto(0x12000000u, 0x0a000000u, rd, rn, rm, shiftType, shiftAmount);
+        }
+
+        public void Ands(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+        {
+            WriteInstructionBitwiseAuto(0x72000000u, 0x6a000000u, rd, rn, rm, shiftType, shiftAmount);
+        }
+
+        public void Asr(Operand rd, Operand rn, Operand rm)
+        {
+            if (rm.Kind == OperandKind.Constant)
+            {
+                int shift = rm.AsInt32();
+                int mask = rd.Type == OperandType.I64 ? 63 : 31;
+                shift &= mask;
+                Sbfm(rd, rn, shift, mask);
+            }
+            else
+            {
+                Asrv(rd, rn, rm);
+            }
+        }
+
+        public void Asrv(Operand rd, Operand rn, Operand rm)
+        {
+            WriteInstructionBitwiseAuto(0x1ac02800u, rd, rn, rm);
+        }
+
+        public void B(int imm)
+        {
+            WriteUInt32(0x14000000u | EncodeSImm26_2(imm));
+        }
+
+        public void B(ArmCondition condition, int imm)
+        {
+            WriteUInt32(0x54000000u | (uint)condition | (EncodeSImm19_2(imm) << 5));
+        }
+
+        public void Blr(Operand rn)
+        {
+            WriteUInt32(0xd63f0000u | (EncodeReg(rn) << 5));
+        }
+
+        public void Br(Operand rn)
+        {
+            WriteUInt32(0xd61f0000u | (EncodeReg(rn) << 5));
+        }
+
+        public void Brk()
+        {
+            WriteUInt32(0xd4200000u);
+        }
+
+        public void Cbz(Operand rt, int imm)
+        {
+            WriteInstructionAuto(0x34000000u | (EncodeSImm19_2(imm) << 5), rt);
+        }
+
+        public void Cbnz(Operand rt, int imm)
+        {
+            WriteInstructionAuto(0x35000000u | (EncodeSImm19_2(imm) << 5), rt);
+        }
+
+        public void Clrex(int crm = 15)
+        {
+            WriteUInt32(0xd503305fu | (EncodeUImm4(crm) << 8));
+        }
+
+        public void Clz(Operand rd, Operand rn)
+        {
+            WriteInstructionAuto(0x5ac01000u, rd, rn);
+        }
+
+        public void CmeqVector(Operand rd, Operand rn, Operand rm, int size, bool q = true)
+        {
+            Debug.Assert((uint)size < 4);
+            WriteSimdInstruction(0x2e208c00u | ((uint)size << 22), rd, rn, rm, q);
+        }
+
+        public void Cmp(Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+        {
+            Subs(Factory.Register(ZrRegister, RegisterType.Integer, rn.Type), rn, rm, shiftType, shiftAmount);
+        }
+
+        public void Csel(Operand rd, Operand rn, Operand rm, ArmCondition condition)
+        {
+            WriteInstructionBitwiseAuto(0x1a800000u | ((uint)condition << 12), rd, rn, rm);
+        }
+
+        public void Cset(Operand rd, ArmCondition condition)
+        {
+            var zr = Factory.Register(ZrRegister, RegisterType.Integer, rd.Type);
+            Csinc(rd, zr, zr, (ArmCondition)((int)condition ^ 1));
+        }
+
+        public void Csinc(Operand rd, Operand rn, Operand rm, ArmCondition condition)
+        {
+            WriteInstructionBitwiseAuto(0x1a800400u | ((uint)condition << 12), rd, rn, rm);
+        }
+
+        public void Dmb(uint option)
+        {
+            WriteUInt32(0xd50330bfu | (option << 8));
+        }
+
+        public void DupScalar(Operand rd, Operand rn, int index, int size)
+        {
+            WriteInstruction(0x5e000400u | (EncodeIndexSizeImm5(index, size) << 16), rd, rn);
+        }
+
+        public void Eor(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+        {
+            WriteInstructionBitwiseAuto(0x52000000u, 0x4a000000u, rd, rn, rm, shiftType, shiftAmount);
+        }
+
+        public void EorVector(Operand rd, Operand rn, Operand rm, bool q = true)
+        {
+            WriteSimdInstruction(0x2e201c00u, rd, rn, rm, q);
+        }
+
+        public void Extr(Operand rd, Operand rn, Operand rm, int imms)
+        {
+            uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u;
+            WriteInstructionBitwiseAuto(0x13800000u | n | (EncodeUImm6(imms) << 10), rd, rn, rm);
+        }
+
+        public void FaddScalar(Operand rd, Operand rn, Operand rm)
+        {
+            WriteFPInstructionAuto(0x1e202800u, rd, rn, rm);
+        }
+
+        public void FcvtScalar(Operand rd, Operand rn)
+        {
+            uint instruction = 0x1e224000u | (rd.Type == OperandType.FP64 ? 1u << 15 : 1u << 22);
+            WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5));
+        }
+
+        public void FdivScalar(Operand rd, Operand rn, Operand rm)
+        {
+            WriteFPInstructionAuto(0x1e201800u, rd, rn, rm);
+        }
+
+        public void Fmov(Operand rd, Operand rn)
+        {
+            WriteFPInstructionAuto(0x1e204000u, rd, rn);
+        }
+
+        public void Fmov(Operand rd, Operand rn, bool topHalf)
+        {
+            Debug.Assert(rd.Type.IsInteger() != rn.Type.IsInteger());
+            Debug.Assert(rd.Type == OperandType.I64 || rn.Type == OperandType.I64 || !topHalf);
+
+            uint opcode = rd.Type.IsInteger() ? 0b110u : 0b111u;
+
+            uint rmode = topHalf ? 1u << 19 : 0u;
+            uint ftype = rd.Type == OperandType.FP64 || rn.Type == OperandType.FP64 ? 1u << 22 : 0u;
+            uint sf    = rd.Type == OperandType.I64  || rn.Type == OperandType.I64  ? SfFlag   : 0u;
+
+            WriteUInt32(0x1e260000u | (opcode << 16) | rmode | ftype | sf | EncodeReg(rd) | (EncodeReg(rn) << 5));
+        }
+
+        public void FmulScalar(Operand rd, Operand rn, Operand rm)
+        {
+            WriteFPInstructionAuto(0x1e200800u, rd, rn, rm);
+        }
+
+        public void FnegScalar(Operand rd, Operand rn)
+        {
+            WriteFPInstructionAuto(0x1e214000u, rd, rn);
+        }
+
+        public void FsubScalar(Operand rd, Operand rn, Operand rm)
+        {
+            WriteFPInstructionAuto(0x1e203800u, rd, rn, rm);
+        }
+
+        public void Ins(Operand rd, Operand rn, int index, int size)
+        {
+            WriteInstruction(0x4e001c00u | (EncodeIndexSizeImm5(index, size) << 16), rd, rn);
+        }
+
+        public void Ins(Operand rd, Operand rn, int srcIndex, int dstIndex, int size)
+        {
+            uint imm4 = (uint)srcIndex << size;
+            Debug.Assert((uint)srcIndex < (16u >> size));
+            WriteInstruction(0x6e000400u | (imm4 << 11) | (EncodeIndexSizeImm5(dstIndex, size) << 16), rd, rn);
+        }
+
+        public void Ldaxp(Operand rt, Operand rt2, Operand rn)
+        {
+            WriteInstruction(0x887f8000u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rt2);
+        }
+
+        public void Ldaxr(Operand rt, Operand rn)
+        {
+            WriteInstruction(0x085ffc00u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn);
+        }
+
+        public void Ldaxrb(Operand rt, Operand rn)
+        {
+            WriteInstruction(0x085ffc00u, rt, rn);
+        }
+
+        public void Ldaxrh(Operand rt, Operand rn)
+        {
+            WriteInstruction(0x085ffc00u | (1u << 30), rt, rn);
+        }
+
+        public void LdpRiPost(Operand rt, Operand rt2, Operand rn, int imm)
+        {
+            uint instruction = GetLdpStpInstruction(0x28c00000u, 0x2cc00000u, imm, rt.Type);
+            WriteInstruction(instruction, rt, rn, rt2);
+        }
+
+        public void LdpRiPre(Operand rt, Operand rt2, Operand rn, int imm)
+        {
+            uint instruction = GetLdpStpInstruction(0x29c00000u, 0x2dc00000u, imm, rt.Type);
+            WriteInstruction(instruction, rt, rn, rt2);
+        }
+
+        public void LdpRiUn(Operand rt, Operand rt2, Operand rn, int imm)
+        {
+            uint instruction = GetLdpStpInstruction(0x29400000u, 0x2d400000u, imm, rt.Type);
+            WriteInstruction(instruction, rt, rn, rt2);
+        }
+
+        public void Ldr(Operand rt, Operand rn)
+        {
+            if (rn.Kind == OperandKind.Memory)
+            {
+                MemoryOperand memOp = rn.GetMemory();
+
+                if (memOp.Index != default)
+                {
+                    Debug.Assert(memOp.Displacement == 0);
+                    Debug.Assert(memOp.Scale == Multiplier.x1 || (int)memOp.Scale == GetScaleForType(rt.Type));
+                    LdrRr(rt, memOp.BaseAddress, memOp.Index, ArmExtensionType.Uxtx, memOp.Scale != Multiplier.x1);
+                }
+                else
+                {
+                    LdrRiUn(rt, memOp.BaseAddress, memOp.Displacement);
+                }
+            }
+            else
+            {
+                LdrRiUn(rt, rn, 0);
+            }
+        }
+
+        public void LdrLit(Operand rt, int offset)
+        {
+            uint instruction = 0x18000000u | (EncodeSImm19_2(offset) << 5);
+
+            if (rt.Type == OperandType.I64)
+            {
+                instruction |= 1u << 30;
+            }
+
+            WriteInstruction(instruction, rt);
+        }
+
+        public void LdrRiPost(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = GetLdrStrInstruction(0xb8400400u, 0x3c400400u, rt.Type) | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void LdrRiPre(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = GetLdrStrInstruction(0xb8400c00u, 0x3c400c00u, rt.Type) | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void LdrRiUn(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = GetLdrStrInstruction(0xb9400000u, 0x3d400000u, rt.Type) | (EncodeUImm12(imm, rt.Type) << 10);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void LdrRr(Operand rt, Operand rn, Operand rm, ArmExtensionType extensionType, bool shift)
+        {
+            uint instruction = GetLdrStrInstruction(0xb8600800u, 0x3ce00800u, rt.Type);
+            WriteInstructionLdrStrAuto(instruction, rt, rn, rm, extensionType, shift);
+        }
+
+        public void LdrbRiPost(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = 0x38400400u | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void LdrbRiPre(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = 0x38400c00u | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void LdrbRiUn(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = 0x39400000u | (EncodeUImm12(imm, 0) << 10);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void LdrhRiPost(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = 0x78400400u | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void LdrhRiPre(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = 0x78400c00u | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void LdrhRiUn(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = 0x79400000u | (EncodeUImm12(imm, 1) << 10);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void Ldur(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = GetLdrStrInstruction(0xb8400000u, 0x3c400000u, rt.Type) | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void Lsl(Operand rd, Operand rn, Operand rm)
+        {
+            if (rm.Kind == OperandKind.Constant)
+            {
+                int shift = rm.AsInt32();
+                int mask = rd.Type == OperandType.I64 ? 63 : 31;
+                shift &= mask;
+                Ubfm(rd, rn, -shift & mask, mask - shift);
+            }
+            else
+            {
+                Lslv(rd, rn, rm);
+            }
+        }
+
+        public void Lslv(Operand rd, Operand rn, Operand rm)
+        {
+            WriteInstructionBitwiseAuto(0x1ac02000u, rd, rn, rm);
+        }
+
+        public void Lsr(Operand rd, Operand rn, Operand rm)
+        {
+            if (rm.Kind == OperandKind.Constant)
+            {
+                int shift = rm.AsInt32();
+                int mask = rd.Type == OperandType.I64 ? 63 : 31;
+                shift &= mask;
+                Ubfm(rd, rn, shift, mask);
+            }
+            else
+            {
+                Lsrv(rd, rn, rm);
+            }
+        }
+
+        public void Lsrv(Operand rd, Operand rn, Operand rm)
+        {
+            WriteInstructionBitwiseAuto(0x1ac02400u, rd, rn, rm);
+        }
+
+        public void Madd(Operand rd, Operand rn, Operand rm, Operand ra)
+        {
+            WriteInstructionAuto(0x1b000000u, rd, rn, rm, ra);
+        }
+
+        public void Mul(Operand rd, Operand rn, Operand rm)
+        {
+            Madd(rd, rn, rm, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type));
+        }
+
+        public void Mov(Operand rd, Operand rn)
+        {
+            if (rd.Type.IsInteger())
+            {
+                Orr(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn);
+            }
+            else
+            {
+                OrrVector(rd, rn, rn);
+            }
+        }
+
+        public void MovSp(Operand rd, Operand rn)
+        {
+            if (rd.GetRegister().Index == SpRegister ||
+                rn.GetRegister().Index == SpRegister)
+            {
+                Add(rd, rn, Factory.Const(rd.Type, 0), immForm: true);
+            }
+            else
+            {
+                Mov(rd, rn);
+            }
+        }
+
+        public void Mov(Operand rd, int imm)
+        {
+            Movz(rd, imm, 0);
+        }
+
+        public void Movz(Operand rd, int imm, int hw)
+        {
+            Debug.Assert((hw & (rd.Type == OperandType.I64 ? 3 : 1)) == hw);
+            WriteInstructionAuto(0x52800000u | (EncodeUImm16(imm) << 5) | ((uint)hw << 21), rd);
+        }
+
+        public void Movk(Operand rd, int imm, int hw)
+        {
+            Debug.Assert((hw & (rd.Type == OperandType.I64 ? 3 : 1)) == hw);
+            WriteInstructionAuto(0x72800000u | (EncodeUImm16(imm) << 5) | ((uint)hw << 21), rd);
+        }
+
+        public void Mrs(Operand rt, uint o0, uint op1, uint crn, uint crm, uint op2)
+        {
+            uint instruction = 0xd5300000u;
+
+            instruction |= (op2 & 7) << 5;
+            instruction |= (crm & 15) << 8;
+            instruction |= (crn & 15) << 12;
+            instruction |= (op1 & 7) << 16;
+            instruction |= (o0 & 1) << 19;
+
+            WriteInstruction(instruction, rt);
+        }
+
+        public void Mvn(Operand rd, Operand rn, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+        {
+            Orn(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn, shiftType, shiftAmount);
+        }
+
+        public void Neg(Operand rd, Operand rn, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+        {
+            Sub(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn, shiftType, shiftAmount);
+        }
+
+        public void Orn(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+        {
+            WriteInstructionBitwiseAuto(0x2a200000u, rd, rn, rm, shiftType, shiftAmount);
+        }
+
+        public void Orr(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+        {
+            WriteInstructionBitwiseAuto(0x32000000u, 0x2a000000u, rd, rn, rm, shiftType, shiftAmount);
+        }
+
+        public void OrrVector(Operand rd, Operand rn, Operand rm, bool q = true)
+        {
+            WriteSimdInstruction(0x0ea01c00u, rd, rn, rm, q);
+        }
+
+        public void Ret(Operand rn)
+        {
+            WriteUInt32(0xd65f0000u | (EncodeReg(rn) << 5));
+        }
+
+        public void Rev(Operand rd, Operand rn)
+        {
+            uint opc0 = rd.Type == OperandType.I64 ? 1u << 10 : 0u;
+            WriteInstructionAuto(0x5ac00800u | opc0, rd, rn);
+        }
+
+        public void Ror(Operand rd, Operand rn, Operand rm)
+        {
+            if (rm.Kind == OperandKind.Constant)
+            {
+                int shift = rm.AsInt32();
+                int mask = rd.Type == OperandType.I64 ? 63 : 31;
+                shift &= mask;
+                Extr(rd, rn, rn, shift);
+            }
+            else
+            {
+                Rorv(rd, rn, rm);
+            }
+        }
+
+        public void Rorv(Operand rd, Operand rn, Operand rm)
+        {
+            WriteInstructionBitwiseAuto(0x1ac02c00u, rd, rn, rm);
+        }
+
+        public void Sbfm(Operand rd, Operand rn, int immr, int imms)
+        {
+            uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u;
+            WriteInstructionAuto(0x13000000u | n | (EncodeUImm6(imms) << 10) | (EncodeUImm6(immr) << 16), rd, rn);
+        }
+
+        public void ScvtfScalar(Operand rd, Operand rn)
+        {
+            uint instruction = 0x1e220000u;
+
+            if (rn.Type == OperandType.I64)
+            {
+                instruction |= SfFlag;
+            }
+
+            WriteFPInstructionAuto(instruction, rd, rn);
+        }
+
+        public void Sdiv(Operand rd, Operand rn, Operand rm)
+        {
+            WriteInstructionRm16Auto(0x1ac00c00u, rd, rn, rm);
+        }
+
+        public void Smulh(Operand rd, Operand rn, Operand rm)
+        {
+            WriteInstructionRm16(0x9b407c00u, rd, rn, rm);
+        }
+
+        public void Stlxp(Operand rt, Operand rt2, Operand rn, Operand rs)
+        {
+            WriteInstruction(0x88208000u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rs, rt2);
+        }
+
+        public void Stlxr(Operand rt, Operand rn, Operand rs)
+        {
+            WriteInstructionRm16(0x0800fc00u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rs);
+        }
+
+        public void Stlxrb(Operand rt, Operand rn, Operand rs)
+        {
+            WriteInstructionRm16(0x0800fc00u, rt, rn, rs);
+        }
+
+        public void Stlxrh(Operand rt, Operand rn, Operand rs)
+        {
+            WriteInstructionRm16(0x0800fc00u | (1u << 30), rt, rn, rs);
+        }
+
+        public void StpRiPost(Operand rt, Operand rt2, Operand rn, int imm)
+        {
+            uint instruction = GetLdpStpInstruction(0x28800000u, 0x2c800000u, imm, rt.Type);
+            WriteInstruction(instruction, rt, rn, rt2);
+        }
+
+        public void StpRiPre(Operand rt, Operand rt2, Operand rn, int imm)
+        {
+            uint instruction = GetLdpStpInstruction(0x29800000u, 0x2d800000u, imm, rt.Type);
+            WriteInstruction(instruction, rt, rn, rt2);
+        }
+
+        public void StpRiUn(Operand rt, Operand rt2, Operand rn, int imm)
+        {
+            uint instruction = GetLdpStpInstruction(0x29000000u, 0x2d000000u, imm, rt.Type);
+            WriteInstruction(instruction, rt, rn, rt2);
+        }
+
+        public void Str(Operand rt, Operand rn)
+        {
+            if (rn.Kind == OperandKind.Memory)
+            {
+                MemoryOperand memOp = rn.GetMemory();
+
+                if (memOp.Index != default)
+                {
+                    Debug.Assert(memOp.Displacement == 0);
+                    Debug.Assert(memOp.Scale == Multiplier.x1 || (int)memOp.Scale == GetScaleForType(rt.Type));
+                    StrRr(rt, memOp.BaseAddress, memOp.Index, ArmExtensionType.Uxtx, memOp.Scale != Multiplier.x1);
+                }
+                else
+                {
+                    StrRiUn(rt, memOp.BaseAddress, memOp.Displacement);
+                }
+            }
+            else
+            {
+                StrRiUn(rt, rn, 0);
+            }
+        }
+
+        public void StrRiPost(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = GetLdrStrInstruction(0xb8000400u, 0x3c000400u, rt.Type) | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void StrRiPre(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = GetLdrStrInstruction(0xb8000c00u, 0x3c000c00u, rt.Type) | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void StrRiUn(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = GetLdrStrInstruction(0xb9000000u, 0x3d000000u, rt.Type) | (EncodeUImm12(imm, rt.Type) << 10);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void StrRr(Operand rt, Operand rn, Operand rm, ArmExtensionType extensionType, bool shift)
+        {
+            uint instruction = GetLdrStrInstruction(0xb8200800u, 0x3ca00800u, rt.Type);
+            WriteInstructionLdrStrAuto(instruction, rt, rn, rm, extensionType, shift);
+        }
+
+        public void StrbRiPost(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = 0x38000400u | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void StrbRiPre(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = 0x38000c00u | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void StrbRiUn(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = 0x39000000u | (EncodeUImm12(imm, 0) << 10);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void StrhRiPost(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = 0x78000400u | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void StrhRiPre(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = 0x78000c00u | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void StrhRiUn(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = 0x79000000u | (EncodeUImm12(imm, 1) << 10);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void Stur(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = GetLdrStrInstruction(0xb8000000u, 0x3c000000u, rt.Type) | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void Sub(Operand rd, Operand rn, Operand rm, ArmExtensionType extensionType, int shiftAmount = 0)
+        {
+            WriteInstructionAuto(0x4b200000u, rd, rn, rm, extensionType, shiftAmount);
+        }
+
+        public void Sub(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+        {
+            WriteInstructionAuto(0x51000000u, 0x4b000000u, rd, rn, rm, shiftType, shiftAmount);
+        }
+
+        public void Subs(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+        {
+            WriteInstructionAuto(0x71000000u, 0x6b000000u, rd, rn, rm, shiftType, shiftAmount);
+        }
+
+        public void Sxtb(Operand rd, Operand rn)
+        {
+            Sbfm(rd, rn, 0, 7);
+        }
+
+        public void Sxth(Operand rd, Operand rn)
+        {
+            Sbfm(rd, rn, 0, 15);
+        }
+
+        public void Sxtw(Operand rd, Operand rn)
+        {
+            Sbfm(rd, rn, 0, 31);
+        }
+
+        public void Tst(Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+        {
+            Ands(Factory.Register(ZrRegister, RegisterType.Integer, rn.Type), rn, rm, shiftType, shiftAmount);
+        }
+
+        public void Ubfm(Operand rd, Operand rn, int immr, int imms)
+        {
+            uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u;
+            WriteInstructionAuto(0x53000000u | n | (EncodeUImm6(imms) << 10) | (EncodeUImm6(immr) << 16), rd, rn);
+        }
+
+        public void UcvtfScalar(Operand rd, Operand rn)
+        {
+            uint instruction = 0x1e230000u;
+
+            if (rn.Type == OperandType.I64)
+            {
+                instruction |= SfFlag;
+            }
+
+            WriteFPInstructionAuto(instruction, rd, rn);
+        }
+
+        public void Udiv(Operand rd, Operand rn, Operand rm)
+        {
+            WriteInstructionRm16Auto(0x1ac00800u, rd, rn, rm);
+        }
+
+        public void Umov(Operand rd, Operand rn, int index, int size)
+        {
+            uint q = size == 3 ? 1u << 30 : 0u;
+            WriteInstruction(0x0e003c00u | (EncodeIndexSizeImm5(index, size) << 16) | q, rd, rn);
+        }
+
+        public void Umulh(Operand rd, Operand rn, Operand rm)
+        {
+            WriteInstructionRm16(0x9bc07c00u, rd, rn, rm);
+        }
+
+        public void Uxtb(Operand rd, Operand rn)
+        {
+            Ubfm(rd, rn, 0, 7);
+        }
+
+        public void Uxth(Operand rd, Operand rn)
+        {
+            Ubfm(rd, rn, 0, 15);
+        }
+
+        private void WriteInstructionAuto(
+            uint instI,
+            uint instR,
+            Operand rd,
+            Operand rn,
+            Operand rm,
+            ArmShiftType shiftType = ArmShiftType.Lsl,
+            int shiftAmount = 0,
+            bool immForm = false)
+        {
+            if (rm.Kind == OperandKind.Constant && (rm.Value != 0 || immForm))
+            {
+                Debug.Assert(shiftAmount == 0);
+                int imm = rm.AsInt32();
+                Debug.Assert((uint)imm == rm.Value);
+                if (imm != 0 && (imm & 0xfff) == 0)
+                {
+                    instI |= 1 << 22; // sh flag
+                    imm >>= 12;
+                }
+                WriteInstructionAuto(instI | (EncodeUImm12(imm, 0) << 10), rd, rn);
+            }
+            else
+            {
+                instR |= EncodeUImm6(shiftAmount) << 10;
+                instR |= (uint)shiftType << 22;
+
+                WriteInstructionRm16Auto(instR, rd, rn, rm);
+            }
+        }
+
+        private void WriteInstructionAuto(
+            uint instruction,
+            Operand rd,
+            Operand rn,
+            Operand rm,
+            ArmExtensionType extensionType,
+            int shiftAmount = 0)
+        {
+            Debug.Assert((uint)shiftAmount <= 4);
+
+            instruction |= (uint)shiftAmount << 10;
+            instruction |= (uint)extensionType << 13;
+
+            WriteInstructionRm16Auto(instruction, rd, rn, rm);
+        }
+
+        private void WriteInstructionBitwiseAuto(
+            uint instI,
+            uint instR,
+            Operand rd,
+            Operand rn,
+            Operand rm,
+            ArmShiftType shiftType = ArmShiftType.Lsl,
+            int shiftAmount = 0)
+        {
+            if (rm.Kind == OperandKind.Constant && rm.Value != 0)
+            {
+                Debug.Assert(shiftAmount == 0);
+                bool canEncode = CodeGenCommon.TryEncodeBitMask(rm, out int immN, out int immS, out int immR);
+                Debug.Assert(canEncode);
+                uint instruction = instI | ((uint)immS << 10) | ((uint)immR << 16) | ((uint)immN << 22);
+
+                WriteInstructionAuto(instruction, rd, rn);
+            }
+            else
+            {
+                WriteInstructionBitwiseAuto(instR, rd, rn, rm, shiftType, shiftAmount);
+            }
+        }
+
+        private void WriteInstructionBitwiseAuto(
+            uint instruction,
+            Operand rd,
+            Operand rn,
+            Operand rm,
+            ArmShiftType shiftType = ArmShiftType.Lsl,
+            int shiftAmount = 0)
+        {
+            if (rd.Type == OperandType.I64)
+            {
+                instruction |= SfFlag;
+            }
+
+            instruction |= EncodeUImm6(shiftAmount) << 10;
+            instruction |= (uint)shiftType << 22;
+
+            WriteInstructionRm16(instruction, rd, rn, rm);
+        }
+
+        private void WriteInstructionLdrStrAuto(
+            uint instruction,
+            Operand rd,
+            Operand rn,
+            Operand rm,
+            ArmExtensionType extensionType,
+            bool shift)
+        {
+            if (shift)
+            {
+                instruction |= 1u << 12;
+            }
+
+            instruction |= (uint)extensionType << 13;
+
+            if (rd.Type == OperandType.I64)
+            {
+                instruction |= 1u << 30;
+            }
+
+            WriteInstructionRm16(instruction, rd, rn, rm);
+        }
+
+        private void WriteInstructionAuto(uint instruction, Operand rd)
+        {
+            if (rd.Type == OperandType.I64)
+            {
+                instruction |= SfFlag;
+            }
+
+            WriteInstruction(instruction, rd);
+        }
+
+        public void WriteInstructionAuto(uint instruction, Operand rd, Operand rn)
+        {
+            if (rd.Type == OperandType.I64)
+            {
+                instruction |= SfFlag;
+            }
+
+            WriteInstruction(instruction, rd, rn);
+        }
+
+        private void WriteInstructionAuto(uint instruction, Operand rd, Operand rn, Operand rm, Operand ra)
+        {
+            if (rd.Type == OperandType.I64)
+            {
+                instruction |= SfFlag;
+            }
+
+            WriteInstruction(instruction, rd, rn, rm, ra);
+        }
+
+        public void WriteInstruction(uint instruction, Operand rd)
+        {
+            WriteUInt32(instruction | EncodeReg(rd));
+        }
+
+        public void WriteInstruction(uint instruction, Operand rd, Operand rn)
+        {
+            WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5));
+        }
+
+        public void WriteInstruction(uint instruction, Operand rd, Operand rn, Operand rm)
+        {
+            WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 10));
+        }
+
+        public void WriteInstruction(uint instruction, Operand rd, Operand rn, Operand rm, Operand ra)
+        {
+            WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(ra) << 10) | (EncodeReg(rm) << 16));
+        }
+
+        private void WriteFPInstructionAuto(uint instruction, Operand rd, Operand rn)
+        {
+            if (rd.Type == OperandType.FP64)
+            {
+                instruction |= 1u << 22;
+            }
+
+            WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5));
+        }
+
+        private void WriteFPInstructionAuto(uint instruction, Operand rd, Operand rn, Operand rm)
+        {
+            if (rd.Type == OperandType.FP64)
+            {
+                instruction |= 1u << 22;
+            }
+
+            WriteInstructionRm16(instruction, rd, rn, rm);
+        }
+
+        private void WriteSimdInstruction(uint instruction, Operand rd, Operand rn, Operand rm, bool q = true)
+        {
+            if (q)
+            {
+                instruction |= 1u << 30;
+            }
+
+            WriteInstructionRm16(instruction, rd, rn, rm);
+        }
+
+        private void WriteInstructionRm16Auto(uint instruction, Operand rd, Operand rn, Operand rm)
+        {
+            if (rd.Type == OperandType.I64)
+            {
+                instruction |= SfFlag;
+            }
+
+            WriteInstructionRm16(instruction, rd, rn, rm);
+        }
+
+        public void WriteInstructionRm16(uint instruction, Operand rd, Operand rn, Operand rm)
+        {
+            WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 16));
+        }
+
+        public void WriteInstructionRm16NoRet(uint instruction, Operand rn, Operand rm)
+        {
+            WriteUInt32(instruction | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 16));
+        }
+
+        private static uint GetLdpStpInstruction(uint intInst, uint vecInst, int imm, OperandType type)
+        {
+            uint instruction;
+            int scale;
+
+            if (type.IsInteger())
+            {
+                instruction = intInst;
+
+                if (type == OperandType.I64)
+                {
+                    instruction |= SfFlag;
+                    scale = 3;
+                }
+                else
+                {
+                    scale = 2;
+                }
+            }
+            else
+            {
+                int opc = type switch
+                {
+                    OperandType.FP32 => 0,
+                    OperandType.FP64 => 1,
+                    _ => 2
+                };
+
+                instruction = vecInst | ((uint)opc << 30);
+                scale = 2 + opc;
+            }
+
+            instruction |= (EncodeSImm7(imm, scale) << 15);
+
+            return instruction;
+        }
+
+        private static uint GetLdrStrInstruction(uint intInst, uint vecInst, OperandType type)
+        {
+            uint instruction;
+
+            if (type.IsInteger())
+            {
+                instruction = intInst;
+
+                if (type == OperandType.I64)
+                {
+                    instruction |= 1 << 30;
+                }
+            }
+            else
+            {
+                instruction = vecInst;
+
+                if (type == OperandType.V128)
+                {
+                    instruction |= 1u << 23;
+                }
+                else
+                {
+                    instruction |= type == OperandType.FP32 ? 2u << 30 : 3u << 30;
+                }
+            }
+
+            return instruction;
+        }
+
+        private static uint EncodeIndexSizeImm5(int index, int size)
+        {
+            Debug.Assert((uint)size < 4);
+            Debug.Assert((uint)index < (16u >> size), $"Invalid index {index} and size {size} combination.");
+            return ((uint)index << (size + 1)) | (1u << size);
+        }
+
+        private static uint EncodeSImm7(int value, int scale)
+        {
+            uint imm = (uint)(value >> scale) & 0x7f;
+            Debug.Assert(((int)imm << 25) >> (25 - scale) == value, $"Failed to encode constant 0x{value:X} with scale {scale}.");
+            return imm;
+        }
+
+        private static uint EncodeSImm9(int value)
+        {
+            uint imm = (uint)value & 0x1ff;
+            Debug.Assert(((int)imm << 23) >> 23 == value, $"Failed to encode constant 0x{value:X}.");
+            return imm;
+        }
+
+        private static uint EncodeSImm19_2(int value)
+        {
+            uint imm = (uint)(value >> 2) & 0x7ffff;
+            Debug.Assert(((int)imm << 13) >> 11 == value, $"Failed to encode constant 0x{value:X}.");
+            return imm;
+        }
+
+        private static uint EncodeSImm26_2(int value)
+        {
+            uint imm = (uint)(value >> 2) & 0x3ffffff;
+            Debug.Assert(((int)imm << 6) >> 4 == value, $"Failed to encode constant 0x{value:X}.");
+            return imm;
+        }
+
+        private static uint EncodeUImm4(int value)
+        {
+            uint imm = (uint)value & 0xf;
+            Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}.");
+            return imm;
+        }
+
+        private static uint EncodeUImm6(int value)
+        {
+            uint imm = (uint)value & 0x3f;
+            Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}.");
+            return imm;
+        }
+
+        private static uint EncodeUImm12(int value, OperandType type)
+        {
+            return EncodeUImm12(value, GetScaleForType(type));
+        }
+
+        private static uint EncodeUImm12(int value, int scale)
+        {
+            uint imm = (uint)(value >> scale) & 0xfff;
+            Debug.Assert((int)imm << scale == value, $"Failed to encode constant 0x{value:X} with scale {scale}.");
+            return imm;
+        }
+
+        private static uint EncodeUImm16(int value)
+        {
+            uint imm = (uint)value & 0xffff;
+            Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}.");
+            return imm;
+        }
+
+        private static uint EncodeReg(Operand reg)
+        {
+            if (reg.Kind == OperandKind.Constant && reg.Value == 0)
+            {
+                return ZrRegister;
+            }
+
+            uint regIndex = (uint)reg.GetRegister().Index;
+            Debug.Assert(reg.Kind == OperandKind.Register);
+            Debug.Assert(regIndex < 32);
+            return regIndex;
+        }
+
+        public static int GetScaleForType(OperandType type)
+        {
+            return type switch
+            {
+                OperandType.I32 => 2,
+                OperandType.I64 => 3,
+                OperandType.FP32 => 2,
+                OperandType.FP64 => 3,
+                OperandType.V128 => 4,
+                _ => throw new ArgumentException($"Invalid type {type}.")
+            };
+        }
+
+        private void WriteInt16(short value)
+        {
+            WriteUInt16((ushort)value);
+        }
+
+        private void WriteInt32(int value)
+        {
+            WriteUInt32((uint)value);
+        }
+
+        private void WriteByte(byte value)
+        {
+            _stream.WriteByte(value);
+        }
+
+        private void WriteUInt16(ushort value)
+        {
+            _stream.WriteByte((byte)(value >> 0));
+            _stream.WriteByte((byte)(value >> 8));
+        }
+
+        private void WriteUInt32(uint value)
+        {
+            _stream.WriteByte((byte)(value >> 0));
+            _stream.WriteByte((byte)(value >> 8));
+            _stream.WriteByte((byte)(value >> 16));
+            _stream.WriteByte((byte)(value >> 24));
+        }
+    }
+}
diff --git a/ARMeilleure/CodeGen/Arm64/CallingConvention.cs b/ARMeilleure/CodeGen/Arm64/CallingConvention.cs
new file mode 100644
index 0000000000..fda8d7867c
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/CallingConvention.cs
@@ -0,0 +1,96 @@
+using System;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+    static class CallingConvention
+    {
+        private const int RegistersMask = unchecked((int)0xffffffff);
+
+        // Some of those register have specific roles and can't be used as general purpose registers.
+        // X18 - Reserved for platform specific usage.
+        // X29 - Frame pointer.
+        // X30 - Return address.
+        // X31 - Not an actual register, in some cases maps to SP, and in others to ZR.
+        private const int ReservedRegsMask = (1 << CodeGenCommon.ReservedRegister) | (1 << 18) | (1 << 29) | (1 << 30) | (1 << 31);
+
+        public static int GetIntAvailableRegisters()
+        {
+            return RegistersMask & ~ReservedRegsMask;
+        }
+
+        public static int GetVecAvailableRegisters()
+        {
+            return RegistersMask;
+        }
+
+        public static int GetIntCallerSavedRegisters()
+        {
+            return (GetIntCalleeSavedRegisters() ^ RegistersMask) & ~ReservedRegsMask;
+        }
+
+        public static int GetFpCallerSavedRegisters()
+        {
+            return GetFpCalleeSavedRegisters() ^ RegistersMask;
+        }
+
+        public static int GetVecCallerSavedRegisters()
+        {
+            return GetVecCalleeSavedRegisters() ^ RegistersMask;
+        }
+
+        public static int GetIntCalleeSavedRegisters()
+        {
+            return 0x1ff80000; // X19 to X28
+        }
+
+        public static int GetFpCalleeSavedRegisters()
+        {
+            return 0xff00; // D8 to D15
+        }
+
+        public static int GetVecCalleeSavedRegisters()
+        {
+            return 0;
+        }
+
+        public static int GetArgumentsOnRegsCount()
+        {
+            return 8;
+        }
+
+        public static int GetIntArgumentRegister(int index)
+        {
+            if ((uint)index < (uint)GetArgumentsOnRegsCount())
+            {
+                return index;
+            }
+
+            throw new ArgumentOutOfRangeException(nameof(index));
+        }
+
+        public static int GetVecArgumentRegister(int index)
+        {
+            if ((uint)index < (uint)GetArgumentsOnRegsCount())
+            {
+                return index;
+            }
+
+            throw new ArgumentOutOfRangeException(nameof(index));
+        }
+
+        public static int GetIntReturnRegister()
+        {
+            return 0;
+        }
+
+        public static int GetIntReturnRegisterHigh()
+        {
+            return 1;
+        }
+
+        public static int GetVecReturnRegister()
+        {
+            return 0;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs b/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs
new file mode 100644
index 0000000000..e67d2fdb7f
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs
@@ -0,0 +1,173 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Numerics;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+    static class CodeGenCommon
+    {
+        public const int TcAddressRegister = 8;
+        public const int ReservedRegister = 17;
+
+        public static bool ConstFitsOnSImm7(int value, int scale)
+        {
+            return (((value >> scale) << 25) >> (25 - scale)) == value;
+        }
+
+        public static bool ConstFitsOnSImm9(int value)
+        {
+            return ((value << 23) >> 23) == value;
+        }
+
+        public static bool ConstFitsOnUImm12(int value)
+        {
+            return (value & 0xfff) == value;
+        }
+
+        public static bool ConstFitsOnUImm12(int value, OperandType type)
+        {
+            int scale = Assembler.GetScaleForType(type);
+            return (((value >> scale) & 0xfff) << scale) == value;
+        }
+
+        public static bool TryEncodeBitMask(Operand operand, out int immN, out int immS, out int immR)
+        {
+            ulong value = operand.Value;
+
+            if (operand.Type == OperandType.I32)
+            {
+                value |= value << 32;
+            }
+
+            return TryEncodeBitMask(value, out immN, out immS, out immR);
+        }
+
+        public static bool TryEncodeBitMask(ulong value, out int immN, out int immS, out int immR)
+        {
+            // Some special values also can't be encoded:
+            // 0 can't be encoded because we need to subtract 1 from onesCount (which would became negative if 0).
+            // A value with all bits set can't be encoded because it is reserved according to the spec, because:
+            // Any value AND all ones will be equal itself, so it's effectively a no-op.
+            // Any value OR all ones will be equal all ones, so one can just use MOV.
+            // Any value XOR all ones will be equal its inverse, so one can just use MVN.
+            if (value == ulong.MaxValue)
+            {
+                immN = 0;
+                immS = 0;
+                immR = 0;
+
+                return false;
+            }
+
+            int bitLength = CountSequence(value);
+
+            if ((value >> bitLength) != 0)
+            {
+                bitLength += CountSequence(value >> bitLength);
+            }
+
+            int bitLengthLog2 = BitOperations.Log2((uint)bitLength);
+            int bitLengthPow2 = 1 << bitLengthLog2;
+
+            if (bitLengthPow2 < bitLength)
+            {
+                bitLengthLog2++;
+                bitLengthPow2 <<= 1;
+            }
+
+            int selectedESize = 64;
+            int repetitions = 1;
+            int onesCount = BitOperations.PopCount(value);
+
+            if (bitLengthPow2 < 64 && (value >> bitLengthPow2) != 0)
+            {
+                for (int eSizeLog2 = bitLengthLog2; eSizeLog2 < 6; eSizeLog2++)
+                {
+                    bool match = true;
+                    int eSize = 1 << eSizeLog2;
+                    ulong mask = (1UL << eSize) - 1;
+                    ulong eValue = value & mask;
+
+                    for (int e = 1; e < 64 / eSize; e++)
+                    {
+                        if (((value >> (e * eSize)) & mask) != eValue)
+                        {
+                            match = false;
+                            break;
+                        }
+                    }
+
+                    if (match)
+                    {
+                        selectedESize = eSize;
+                        repetitions = 64 / eSize;
+                        onesCount = BitOperations.PopCount(eValue);
+                        break;
+                    }
+                }
+            }
+
+            // Find rotation. We have two cases, one where the highest bit is 0
+            // and one where it is 1.
+            // If it's 1, we just need to count the number of 1 bits on the MSB to find the right rotation.
+            // If it's 0, we just need to count the number of 0 bits on the LSB to find the left rotation,
+            // then we can convert it to the right rotation shift by subtracting the value from the element size.
+            int rotation;
+            long vHigh = (long)(value << (64 - selectedESize));
+            if (vHigh < 0)
+            {
+                rotation = BitOperations.LeadingZeroCount(~(ulong)vHigh);
+            }
+            else
+            {
+                rotation = (selectedESize - BitOperations.TrailingZeroCount(value)) & (selectedESize - 1);
+            }
+
+            // Reconstruct value and see if it matches. If not, we can't encode.
+            ulong reconstructed = onesCount == 64 ? ulong.MaxValue : RotateRight((1UL << onesCount) - 1, rotation, selectedESize);
+
+            for (int bit = 32; bit >= selectedESize; bit >>= 1)
+            {
+                reconstructed |= reconstructed << bit;
+            }
+
+            if (reconstructed != value || onesCount == 0)
+            {
+                immN = 0;
+                immS = 0;
+                immR = 0;
+
+                return false;
+            }
+
+            immR = rotation;
+
+            // immN indicates that there are no repetitions.
+            // The MSB of immS indicates the amount of repetitions, and the LSB the number of bits set.
+            if (repetitions == 1)
+            {
+                immN = 1;
+                immS = 0;
+            }
+            else
+            {
+                immN = 0;
+                immS = (0xf80 >> BitOperations.Log2((uint)repetitions)) & 0x3f;
+            }
+
+            immS |= onesCount - 1;
+
+            return true;
+        }
+
+        private static int CountSequence(ulong value)
+        {
+            return BitOperations.TrailingZeroCount(value) + BitOperations.TrailingZeroCount(~value);
+        }
+
+        private static ulong RotateRight(ulong bits, int shift, int size)
+        {
+            return (bits >> shift) | ((bits << (size - shift)) & (size == 64 ? ulong.MaxValue : (1UL << size) - 1));
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs b/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs
new file mode 100644
index 0000000000..1ddde0c196
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs
@@ -0,0 +1,286 @@
+using ARMeilleure.CodeGen.Linking;
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+    class CodeGenContext
+    {
+        private const int BccInstLength = 4;
+        private const int CbnzInstLength = 4;
+        private const int LdrLitInstLength = 4;
+
+        private Stream _stream;
+
+        public int StreamOffset => (int)_stream.Length;
+
+        public AllocationResult AllocResult { get; }
+
+        public Assembler Assembler { get; }
+
+        public BasicBlock CurrBlock { get; private set; }
+
+        public bool HasCall { get; }
+
+        public int CallArgsRegionSize { get; }
+        public int FpLrSaveRegionSize { get; }
+
+        private readonly Dictionary<BasicBlock, long> _visitedBlocks;
+        private readonly Dictionary<BasicBlock, List<(ArmCondition Condition, long BranchPos)>> _pendingBranches;
+
+        private struct ConstantPoolEntry
+        {
+            public readonly int Offset;
+            public readonly Symbol Symbol;
+            public readonly List<(Operand, int)> LdrOffsets;
+
+            public ConstantPoolEntry(int offset, Symbol symbol)
+            {
+                Offset = offset;
+                Symbol = symbol;
+                LdrOffsets = new List<(Operand, int)>();
+            }
+        }
+
+        private readonly Dictionary<ulong, ConstantPoolEntry> _constantPool;
+
+        private bool _constantPoolWritten;
+        private long _constantPoolOffset;
+
+        private ArmCondition _jNearCondition;
+        private Operand _jNearValue;
+
+        private long _jNearPosition;
+
+        private readonly bool _relocatable;
+
+        public CodeGenContext(AllocationResult allocResult, int maxCallArgs, int blocksCount, bool relocatable)
+        {
+            _stream = new MemoryStream();
+
+            AllocResult = allocResult;
+
+            Assembler = new Assembler(_stream);
+
+            bool hasCall = maxCallArgs >= 0;
+
+            HasCall = hasCall;
+
+            if (maxCallArgs < 0)
+            {
+                maxCallArgs = 0;
+            }
+
+            CallArgsRegionSize = maxCallArgs * 16;
+            FpLrSaveRegionSize = hasCall ? 16 : 0;
+
+            _visitedBlocks = new Dictionary<BasicBlock, long>();
+            _pendingBranches = new Dictionary<BasicBlock, List<(ArmCondition, long)>>();
+            _constantPool = new Dictionary<ulong, ConstantPoolEntry>();
+
+            _relocatable = relocatable;
+        }
+
+        public void EnterBlock(BasicBlock block)
+        {
+            CurrBlock = block;
+
+            long target = _stream.Position;
+
+            if (_pendingBranches.TryGetValue(block, out var list))
+            {
+                foreach (var tuple in list)
+                {
+                    _stream.Seek(tuple.BranchPos, SeekOrigin.Begin);
+                    WriteBranch(tuple.Condition, target);
+                }
+
+                _stream.Seek(target, SeekOrigin.Begin);
+                _pendingBranches.Remove(block);
+            }
+
+            _visitedBlocks.Add(block, target);
+        }
+
+        public void JumpTo(BasicBlock target)
+        {
+            JumpTo(ArmCondition.Al, target);
+        }
+
+        public void JumpTo(ArmCondition condition, BasicBlock target)
+        {
+            if (_visitedBlocks.TryGetValue(target, out long offset))
+            {
+                WriteBranch(condition, offset);
+            }
+            else
+            {
+                if (!_pendingBranches.TryGetValue(target, out var list))
+                {
+                    list = new List<(ArmCondition, long)>();
+                    _pendingBranches.Add(target, list);
+                }
+
+                list.Add((condition, _stream.Position));
+
+                _stream.Seek(BccInstLength, SeekOrigin.Current);
+            }
+        }
+
+        private void WriteBranch(ArmCondition condition, long to)
+        {
+            int imm = checked((int)(to - _stream.Position));
+
+            if (condition != ArmCondition.Al)
+            {
+                Assembler.B(condition, imm);
+            }
+            else
+            {
+                Assembler.B(imm);
+            }
+        }
+
+        public void JumpToNear(ArmCondition condition)
+        {
+            _jNearCondition = condition;
+            _jNearPosition = _stream.Position;
+
+            _stream.Seek(BccInstLength, SeekOrigin.Current);
+        }
+
+        public void JumpToNearIfNotZero(Operand value)
+        {
+            _jNearValue = value;
+            _jNearPosition = _stream.Position;
+
+            _stream.Seek(CbnzInstLength, SeekOrigin.Current);
+        }
+
+        public void JumpHere()
+        {
+            long currentPosition = _stream.Position;
+            long offset = currentPosition - _jNearPosition;
+
+            _stream.Seek(_jNearPosition, SeekOrigin.Begin);
+
+            if (_jNearValue != default)
+            {
+                Assembler.Cbnz(_jNearValue, checked((int)offset));
+                _jNearValue = default;
+            }
+            else
+            {
+                Assembler.B(_jNearCondition, checked((int)offset));
+            }
+
+            _stream.Seek(currentPosition, SeekOrigin.Begin);
+        }
+
+        public void ReserveRelocatableConstant(Operand rt, Symbol symbol, ulong value)
+        {
+            if (!_constantPool.TryGetValue(value, out ConstantPoolEntry cpe))
+            {
+                cpe = new ConstantPoolEntry(_constantPool.Count * sizeof(ulong), symbol);
+                _constantPool.Add(value, cpe);
+            }
+
+            cpe.LdrOffsets.Add((rt, (int)_stream.Position));
+            _stream.Seek(LdrLitInstLength, SeekOrigin.Current);
+        }
+
+        private long WriteConstantPool()
+        {
+            if (_constantPoolWritten)
+            {
+                return _constantPoolOffset;
+            }
+
+            long constantPoolBaseOffset = _stream.Position;
+
+            foreach (ulong value in _constantPool.Keys)
+            {
+                WriteUInt64(value);
+            }
+
+            foreach (ConstantPoolEntry cpe in _constantPool.Values)
+            {
+                foreach ((Operand rt, int ldrOffset) in cpe.LdrOffsets)
+                {
+                    _stream.Seek(ldrOffset, SeekOrigin.Begin);
+
+                    int absoluteOffset = checked((int)(constantPoolBaseOffset + cpe.Offset));
+                    int pcRelativeOffset = absoluteOffset - ldrOffset;
+
+                    Assembler.LdrLit(rt, pcRelativeOffset);
+                }
+            }
+
+            _stream.Seek(constantPoolBaseOffset + _constantPool.Count * sizeof(ulong), SeekOrigin.Begin);
+
+            _constantPoolOffset = constantPoolBaseOffset;
+            _constantPoolWritten = true;
+
+            return constantPoolBaseOffset;
+        }
+
+        public (byte[], RelocInfo) GetCode()
+        {
+            long constantPoolBaseOffset = WriteConstantPool();
+
+            byte[] code = new byte[_stream.Length];
+
+            long originalPosition = _stream.Position;
+
+            _stream.Seek(0, SeekOrigin.Begin);
+            _stream.Read(code, 0, code.Length);
+            _stream.Seek(originalPosition, SeekOrigin.Begin);
+
+            RelocInfo relocInfo;
+
+            if (_relocatable)
+            {
+                RelocEntry[] relocs = new RelocEntry[_constantPool.Count];
+
+                int index = 0;
+
+                foreach (ConstantPoolEntry cpe in _constantPool.Values)
+                {
+                    if (cpe.Symbol.Type != SymbolType.None)
+                    {
+                        int absoluteOffset = checked((int)(constantPoolBaseOffset + cpe.Offset));
+                        relocs[index++] = new RelocEntry(absoluteOffset, cpe.Symbol);
+                    }
+                }
+
+                if (index != relocs.Length)
+                {
+                    Array.Resize(ref relocs, index);
+                }
+
+                relocInfo = new RelocInfo(relocs);
+            }
+            else
+            {
+                relocInfo = new RelocInfo(new RelocEntry[0]);
+            }
+
+            return (code, relocInfo);
+        }
+
+        private void WriteUInt64(ulong value)
+        {
+            _stream.WriteByte((byte)(value >> 0));
+            _stream.WriteByte((byte)(value >> 8));
+            _stream.WriteByte((byte)(value >> 16));
+            _stream.WriteByte((byte)(value >> 24));
+            _stream.WriteByte((byte)(value >> 32));
+            _stream.WriteByte((byte)(value >> 40));
+            _stream.WriteByte((byte)(value >> 48));
+            _stream.WriteByte((byte)(value >> 56));
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs b/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs
new file mode 100644
index 0000000000..704aa45acd
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs
@@ -0,0 +1,1576 @@
+using ARMeilleure.CodeGen.Linking;
+using ARMeilleure.CodeGen.Optimizations;
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.CodeGen.Unwinding;
+using ARMeilleure.Common;
+using ARMeilleure.Diagnostics;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Numerics;
+
+using static ARMeilleure.IntermediateRepresentation.Operand;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+    static class CodeGenerator
+    {
+        private const int DWordScale = 3;
+
+        private const int RegistersCount = 32;
+
+        private const int FpRegister = 29;
+        private const int LrRegister = 30;
+        private const int SpRegister = 31;
+        private const int ZrRegister = 31;
+
+        private enum AccessSize
+        {
+            Byte,
+            Hword,
+            Auto
+        }
+
+        private static Action<CodeGenContext, Operation>[] _instTable;
+
+        static CodeGenerator()
+        {
+            _instTable = new Action<CodeGenContext, Operation>[EnumUtils.GetCount(typeof(Instruction))];
+
+            Add(Instruction.Add,                     GenerateAdd);
+            Add(Instruction.BitwiseAnd,              GenerateBitwiseAnd);
+            Add(Instruction.BitwiseExclusiveOr,      GenerateBitwiseExclusiveOr);
+            Add(Instruction.BitwiseNot,              GenerateBitwiseNot);
+            Add(Instruction.BitwiseOr,               GenerateBitwiseOr);
+            Add(Instruction.BranchIf,                GenerateBranchIf);
+            Add(Instruction.ByteSwap,                GenerateByteSwap);
+            Add(Instruction.Call,                    GenerateCall);
+            //Add(Instruction.Clobber,                 GenerateClobber);
+            Add(Instruction.Compare,                 GenerateCompare);
+            Add(Instruction.CompareAndSwap,          GenerateCompareAndSwap);
+            Add(Instruction.CompareAndSwap16,        GenerateCompareAndSwap16);
+            Add(Instruction.CompareAndSwap8,         GenerateCompareAndSwap8);
+            Add(Instruction.ConditionalSelect,       GenerateConditionalSelect);
+            Add(Instruction.ConvertI64ToI32,         GenerateConvertI64ToI32);
+            Add(Instruction.ConvertToFP,             GenerateConvertToFP);
+            Add(Instruction.ConvertToFPUI,           GenerateConvertToFPUI);
+            Add(Instruction.Copy,                    GenerateCopy);
+            Add(Instruction.CountLeadingZeros,       GenerateCountLeadingZeros);
+            Add(Instruction.Divide,                  GenerateDivide);
+            Add(Instruction.DivideUI,                GenerateDivideUI);
+            Add(Instruction.Fill,                    GenerateFill);
+            Add(Instruction.Load,                    GenerateLoad);
+            Add(Instruction.Load16,                  GenerateLoad16);
+            Add(Instruction.Load8,                   GenerateLoad8);
+            Add(Instruction.MemoryBarrier,           GenerateMemoryBarrier);
+            Add(Instruction.Multiply,                GenerateMultiply);
+            Add(Instruction.Multiply64HighSI,        GenerateMultiply64HighSI);
+            Add(Instruction.Multiply64HighUI,        GenerateMultiply64HighUI);
+            Add(Instruction.Negate,                  GenerateNegate);
+            Add(Instruction.Return,                  GenerateReturn);
+            Add(Instruction.RotateRight,             GenerateRotateRight);
+            Add(Instruction.ShiftLeft,               GenerateShiftLeft);
+            Add(Instruction.ShiftRightSI,            GenerateShiftRightSI);
+            Add(Instruction.ShiftRightUI,            GenerateShiftRightUI);
+            Add(Instruction.SignExtend16,            GenerateSignExtend16);
+            Add(Instruction.SignExtend32,            GenerateSignExtend32);
+            Add(Instruction.SignExtend8,             GenerateSignExtend8);
+            Add(Instruction.Spill,                   GenerateSpill);
+            Add(Instruction.SpillArg,                GenerateSpillArg);
+            Add(Instruction.StackAlloc,              GenerateStackAlloc);
+            Add(Instruction.Store,                   GenerateStore);
+            Add(Instruction.Store16,                 GenerateStore16);
+            Add(Instruction.Store8,                  GenerateStore8);
+            Add(Instruction.Subtract,                GenerateSubtract);
+            Add(Instruction.Tailcall,                GenerateTailcall);
+            Add(Instruction.VectorCreateScalar,      GenerateVectorCreateScalar);
+            Add(Instruction.VectorExtract,           GenerateVectorExtract);
+            Add(Instruction.VectorExtract16,         GenerateVectorExtract16);
+            Add(Instruction.VectorExtract8,          GenerateVectorExtract8);
+            Add(Instruction.VectorInsert,            GenerateVectorInsert);
+            Add(Instruction.VectorInsert16,          GenerateVectorInsert16);
+            Add(Instruction.VectorInsert8,           GenerateVectorInsert8);
+            Add(Instruction.VectorOne,               GenerateVectorOne);
+            Add(Instruction.VectorZero,              GenerateVectorZero);
+            Add(Instruction.VectorZeroUpper64,       GenerateVectorZeroUpper64);
+            Add(Instruction.VectorZeroUpper96,       GenerateVectorZeroUpper96);
+            Add(Instruction.ZeroExtend16,            GenerateZeroExtend16);
+            Add(Instruction.ZeroExtend32,            GenerateZeroExtend32);
+            Add(Instruction.ZeroExtend8,             GenerateZeroExtend8);
+
+            static void Add(Instruction inst, Action<CodeGenContext, Operation> func)
+            {
+                _instTable[(int)inst] = func;
+            }
+        }
+
+        public static CompiledFunction Generate(CompilerContext cctx)
+        {
+            ControlFlowGraph cfg = cctx.Cfg;
+
+            Logger.StartPass(PassName.Optimization);
+
+            if (cctx.Options.HasFlag(CompilerOptions.Optimize))
+            {
+                if (cctx.Options.HasFlag(CompilerOptions.SsaForm))
+                {
+                    Optimizer.RunPass(cfg);
+                }
+
+                BlockPlacement.RunPass(cfg);
+            }
+
+            Arm64Optimizer.RunPass(cfg);
+
+            Logger.EndPass(PassName.Optimization, cfg);
+
+            Logger.StartPass(PassName.PreAllocation);
+
+            StackAllocator stackAlloc = new();
+
+            PreAllocator.RunPass(cctx, stackAlloc, out int maxCallArgs);
+
+            Logger.EndPass(PassName.PreAllocation, cfg);
+
+            Logger.StartPass(PassName.RegisterAllocation);
+
+            if (cctx.Options.HasFlag(CompilerOptions.SsaForm))
+            {
+                Ssa.Deconstruct(cfg);
+            }
+
+            IRegisterAllocator regAlloc;
+
+            if (cctx.Options.HasFlag(CompilerOptions.Lsra))
+            {
+                regAlloc = new LinearScanAllocator();
+            }
+            else
+            {
+                regAlloc = new HybridAllocator();
+            }
+
+            RegisterMasks regMasks = new(
+                CallingConvention.GetIntAvailableRegisters(),
+                CallingConvention.GetVecAvailableRegisters(),
+                CallingConvention.GetIntCallerSavedRegisters(),
+                CallingConvention.GetVecCallerSavedRegisters(),
+                CallingConvention.GetIntCalleeSavedRegisters(),
+                CallingConvention.GetVecCalleeSavedRegisters(),
+                RegistersCount);
+
+            AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks);
+
+            Logger.EndPass(PassName.RegisterAllocation, cfg);
+
+            Logger.StartPass(PassName.CodeGeneration);
+
+            //Console.Error.WriteLine(IRDumper.GetDump(cfg));
+
+            bool relocatable = (cctx.Options & CompilerOptions.Relocatable) != 0;
+
+            CodeGenContext context = new(allocResult, maxCallArgs, cfg.Blocks.Count, relocatable);
+
+            UnwindInfo unwindInfo = WritePrologue(context);
+
+            for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+            {
+                context.EnterBlock(block);
+
+                for (Operation node = block.Operations.First; node != default;)
+                {
+                    node = GenerateOperation(context, node);
+                }
+
+                if (block.SuccessorsCount == 0)
+                {
+                    // The only blocks which can have 0 successors are exit blocks.
+                    Operation last = block.Operations.Last;
+
+                    Debug.Assert(last.Instruction == Instruction.Tailcall ||
+                                 last.Instruction == Instruction.Return);
+                }
+                else
+                {
+                    BasicBlock succ = block.GetSuccessor(0);
+
+                    if (succ != block.ListNext)
+                    {
+                        context.JumpTo(succ);
+                    }
+                }
+            }
+
+            (byte[] code, RelocInfo relocInfo) = context.GetCode();
+
+            Logger.EndPass(PassName.CodeGeneration);
+
+            return new CompiledFunction(code, unwindInfo, relocInfo);
+        }
+
+        private static Operation GenerateOperation(CodeGenContext context, Operation operation)
+        {
+            if (operation.Instruction == Instruction.Extended)
+            {
+                CodeGeneratorIntrinsic.GenerateOperation(context, operation);
+            }
+            else
+            {
+                if (IsLoadOrStore(operation) &&
+                    operation.ListNext != default &&
+                    operation.ListNext.Instruction == operation.Instruction &&
+                    TryPairMemoryOp(context, operation, operation.ListNext))
+                {
+                    // Skip next operation if we managed to pair them.
+                    return operation.ListNext.ListNext;
+                }
+
+                Action<CodeGenContext, Operation> func = _instTable[(int)operation.Instruction];
+
+                if (func != null)
+                {
+                    func(context, operation);
+                }
+                else
+                {
+                    throw new ArgumentException($"Invalid instruction \"{operation.Instruction}\".");
+                }
+            }
+
+            return operation.ListNext;
+        }
+
+        private static void GenerateAdd(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+
+            // ValidateBinOp(dest, src1, src2);
+
+            if (dest.Type.IsInteger())
+            {
+                context.Assembler.Add(dest, src1, src2);
+            }
+            else
+            {
+                context.Assembler.FaddScalar(dest, src1, src2);
+            }
+        }
+
+        private static void GenerateBitwiseAnd(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+
+            ValidateBinOp(dest, src1, src2);
+
+            Debug.Assert(dest.Type.IsInteger());
+
+            context.Assembler.And(dest, src1, src2);
+        }
+
+        private static void GenerateBitwiseExclusiveOr(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+
+            ValidateBinOp(dest, src1, src2);
+
+            if (dest.Type.IsInteger())
+            {
+                context.Assembler.Eor(dest, src1, src2);
+            }
+            else
+            {
+                context.Assembler.EorVector(dest, src1, src2);
+            }
+        }
+
+        private static void GenerateBitwiseNot(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            ValidateUnOp(dest, source);
+
+            Debug.Assert(dest.Type.IsInteger());
+
+            context.Assembler.Mvn(dest, source);
+        }
+
+        private static void GenerateBitwiseOr(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+
+            ValidateBinOp(dest, src1, src2);
+
+            Debug.Assert(dest.Type.IsInteger());
+
+            context.Assembler.Orr(dest, src1, src2);
+        }
+
+        private static void GenerateBranchIf(CodeGenContext context, Operation operation)
+        {
+            Operand comp = operation.GetSource(2);
+
+            Debug.Assert(comp.Kind == OperandKind.Constant);
+
+            var cond = ((Comparison)comp.AsInt32()).ToArmCondition();
+
+            GenerateCompareCommon(context, operation);
+
+            context.JumpTo(cond, context.CurrBlock.GetSuccessor(1));
+        }
+
+        private static void GenerateByteSwap(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            ValidateUnOp(dest, source);
+
+            Debug.Assert(dest.Type.IsInteger());
+
+            context.Assembler.Rev(dest, source);
+        }
+
+        private static void GenerateCall(CodeGenContext context, Operation operation)
+        {
+            context.Assembler.Blr(operation.GetSource(0));
+        }
+
+        private static void GenerateCompare(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand comp = operation.GetSource(2);
+
+            Debug.Assert(dest.Type == OperandType.I32);
+            Debug.Assert(comp.Kind == OperandKind.Constant);
+
+            var cond = ((Comparison)comp.AsInt32()).ToArmCondition();
+
+            GenerateCompareCommon(context, operation);
+
+            context.Assembler.Cset(dest, cond);
+        }
+
+        private static void GenerateCompareAndSwap(CodeGenContext context, Operation operation)
+        {
+            if (operation.SourcesCount == 5) // CompareAndSwap128 has 5 sources, compared to CompareAndSwap64/32's 3.
+            {
+                Operand actualLow    = operation.GetDestination(0);
+                Operand actualHigh   = operation.GetDestination(1);
+                Operand temp0        = operation.GetDestination(2);
+                Operand temp1        = operation.GetDestination(3);
+                Operand address      = operation.GetSource(0);
+                Operand expectedLow  = operation.GetSource(1);
+                Operand expectedHigh = operation.GetSource(2);
+                Operand desiredLow   = operation.GetSource(3);
+                Operand desiredHigh  = operation.GetSource(4);
+
+                GenerateAtomicDcas(
+                    context,
+                    address,
+                    expectedLow,
+                    expectedHigh,
+                    desiredLow,
+                    desiredHigh,
+                    actualLow,
+                    actualHigh,
+                    temp0,
+                    temp1);
+            }
+            else
+            {
+                Operand actual   = operation.GetDestination(0);
+                Operand result   = operation.GetDestination(1);
+                Operand address  = operation.GetSource(0);
+                Operand expected = operation.GetSource(1);
+                Operand desired  = operation.GetSource(2);
+
+                GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Auto);
+            }
+        }
+
+        private static void GenerateCompareAndSwap16(CodeGenContext context, Operation operation)
+        {
+            Operand actual   = operation.GetDestination(0);
+            Operand result   = operation.GetDestination(1);
+            Operand address  = operation.GetSource(0);
+            Operand expected = operation.GetSource(1);
+            Operand desired  = operation.GetSource(2);
+
+            GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Hword);
+        }
+
+        private static void GenerateCompareAndSwap8(CodeGenContext context, Operation operation)
+        {
+            Operand actual   = operation.GetDestination(0);
+            Operand result   = operation.GetDestination(1);
+            Operand address  = operation.GetSource(0);
+            Operand expected = operation.GetSource(1);
+            Operand desired  = operation.GetSource(2);
+
+            GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Byte);
+        }
+
+        private static void GenerateCompareCommon(CodeGenContext context, Operation operation)
+        {
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+
+            EnsureSameType(src1, src2);
+
+            Debug.Assert(src1.Type.IsInteger());
+
+            context.Assembler.Cmp(src1, src2);
+        }
+
+        private static void GenerateConditionalSelect(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+            Operand src3 = operation.GetSource(2);
+
+            EnsureSameType(dest, src2, src3);
+
+            Debug.Assert(dest.Type.IsInteger());
+            Debug.Assert(src1.Type == OperandType.I32);
+
+            context.Assembler.Cmp (src1, Const(src1.Type, 0));
+            context.Assembler.Csel(dest, src2, src3, ArmCondition.Ne);
+        }
+
+        private static void GenerateConvertI64ToI32(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(dest.Type == OperandType.I32 && source.Type == OperandType.I64);
+
+            context.Assembler.Mov(dest, Register(source, OperandType.I32));
+        }
+
+        private static void GenerateConvertToFP(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64);
+            Debug.Assert(dest.Type != source.Type);
+            Debug.Assert(source.Type != OperandType.V128);
+
+            if (source.Type.IsInteger())
+            {
+                context.Assembler.ScvtfScalar(dest, source);
+            }
+            else
+            {
+                context.Assembler.FcvtScalar(dest, source);
+            }
+        }
+
+        private static void GenerateConvertToFPUI(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64);
+            Debug.Assert(dest.Type != source.Type);
+            Debug.Assert(source.Type.IsInteger());
+
+            context.Assembler.UcvtfScalar(dest, source);
+        }
+
+        private static void GenerateCopy(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            EnsureSameType(dest, source);
+
+            Debug.Assert(dest.Type.IsInteger() || source.Kind != OperandKind.Constant);
+
+            // Moves to the same register are useless.
+            if (dest.Kind == source.Kind && dest.Value == source.Value)
+            {
+                return;
+            }
+
+            if (dest.Kind == OperandKind.Register && source.Kind == OperandKind.Constant)
+            {
+                if (source.Relocatable)
+                {
+                    context.ReserveRelocatableConstant(dest, source.Symbol, source.Value);
+                }
+                else
+                {
+                    GenerateConstantCopy(context, dest, source.Value);
+                }
+            }
+            else
+            {
+                context.Assembler.Mov(dest, source);
+            }
+        }
+
+        private static void GenerateCountLeadingZeros(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            EnsureSameType(dest, source);
+
+            Debug.Assert(dest.Type.IsInteger());
+
+            context.Assembler.Clz(dest, source);
+        }
+
+        private static void GenerateDivide(CodeGenContext context, Operation operation)
+        {
+            Operand dest     = operation.Destination;
+            Operand dividend = operation.GetSource(0);
+            Operand divisor  = operation.GetSource(1);
+
+            ValidateBinOp(dest, dividend, divisor);
+
+            if (dest.Type.IsInteger())
+            {
+                context.Assembler.Sdiv(dest, dividend, divisor);
+            }
+            else
+            {
+                context.Assembler.FdivScalar(dest, dividend, divisor);
+            }
+        }
+
+        private static void GenerateDivideUI(CodeGenContext context, Operation operation)
+        {
+            Operand dest     = operation.Destination;
+            Operand dividend = operation.GetSource(0);
+            Operand divisor  = operation.GetSource(1);
+
+            ValidateBinOp(dest, dividend, divisor);
+
+            context.Assembler.Udiv(dest, dividend, divisor);
+        }
+
+        private static void GenerateLoad(CodeGenContext context, Operation operation)
+        {
+            Operand value   = operation.Destination;
+            Operand address = operation.GetSource(0);
+
+            context.Assembler.Ldr(value, address);
+        }
+
+        private static void GenerateLoad16(CodeGenContext context, Operation operation)
+        {
+            Operand value   = operation.Destination;
+            Operand address = operation.GetSource(0);
+
+            Debug.Assert(value.Type.IsInteger());
+
+            context.Assembler.LdrhRiUn(value, address, 0);
+        }
+
+        private static void GenerateLoad8(CodeGenContext context, Operation operation)
+        {
+            Operand value   = operation.Destination;
+            Operand address = operation.GetSource(0);
+
+            Debug.Assert(value.Type.IsInteger());
+
+            context.Assembler.LdrbRiUn(value, address, 0);
+        }
+
+        private static void GenerateMemoryBarrier(CodeGenContext context, Operation operation)
+        {
+            context.Assembler.Dmb(0xf);
+        }
+
+        private static void GenerateMultiply(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+
+            EnsureSameType(dest, src1, src2);
+
+            if (dest.Type.IsInteger())
+            {
+                context.Assembler.Mul(dest, src1, src2);
+            }
+            else
+            {
+                context.Assembler.FmulScalar(dest, src1, src2);
+            }
+        }
+
+        private static void GenerateMultiply64HighSI(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+
+            EnsureSameType(dest, src1, src2);
+
+            Debug.Assert(dest.Type == OperandType.I64);
+
+            context.Assembler.Smulh(dest, src1, src2);
+        }
+
+        private static void GenerateMultiply64HighUI(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+
+            EnsureSameType(dest, src1, src2);
+
+            Debug.Assert(dest.Type == OperandType.I64);
+
+            context.Assembler.Umulh(dest, src1, src2);
+        }
+
+        private static void GenerateNegate(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            ValidateUnOp(dest, source);
+
+            if (dest.Type.IsInteger())
+            {
+                context.Assembler.Neg(dest, source);
+            }
+            else
+            {
+                context.Assembler.FnegScalar(dest, source);
+            }
+        }
+
+        private static void GenerateLoad(CodeGenContext context, Operand value, Operand address, int offset)
+        {
+            if (CodeGenCommon.ConstFitsOnUImm12(offset, value.Type))
+            {
+                context.Assembler.LdrRiUn(value, address, offset);
+            }
+            else if (CodeGenCommon.ConstFitsOnSImm9(offset))
+            {
+                context.Assembler.Ldur(value, address, offset);
+            }
+            else
+            {
+                Operand tempAddress = Register(CodeGenCommon.ReservedRegister);
+                GenerateConstantCopy(context, tempAddress, (ulong)offset);
+                context.Assembler.Add(tempAddress, address, tempAddress, ArmExtensionType.Uxtx); // Address might be SP and must be the first input.
+                context.Assembler.LdrRiUn(value, tempAddress, 0);
+            }
+        }
+
+        private static void GenerateReturn(CodeGenContext context, Operation operation)
+        {
+            WriteEpilogue(context);
+
+            context.Assembler.Ret(Register(LrRegister));
+        }
+
+        private static void GenerateRotateRight(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+
+            ValidateShift(dest, src1, src2);
+
+            context.Assembler.Ror(dest, src1, src2);
+        }
+
+        private static void GenerateShiftLeft(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+
+            ValidateShift(dest, src1, src2);
+
+            context.Assembler.Lsl(dest, src1, src2);
+        }
+
+        private static void GenerateShiftRightSI(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+
+            ValidateShift(dest, src1, src2);
+
+            context.Assembler.Asr(dest, src1, src2);
+        }
+
+        private static void GenerateShiftRightUI(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+
+            ValidateShift(dest, src1, src2);
+
+            context.Assembler.Lsr(dest, src1, src2);
+        }
+
+        private static void GenerateSignExtend16(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+            context.Assembler.Sxth(dest, source);
+        }
+
+        private static void GenerateSignExtend32(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+            context.Assembler.Sxtw(dest, source);
+        }
+
+        private static void GenerateSignExtend8(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+            context.Assembler.Sxtb(dest, source);
+        }
+
+        private static void GenerateFill(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand offset = operation.GetSource(0);
+
+            Debug.Assert(offset.Kind == OperandKind.Constant);
+
+            int offs = offset.AsInt32() + context.CallArgsRegionSize + context.FpLrSaveRegionSize;
+
+            GenerateLoad(context, dest, Register(SpRegister), offs);
+        }
+
+        private static void GenerateStore(CodeGenContext context, Operand value, Operand address, int offset)
+        {
+            if (CodeGenCommon.ConstFitsOnUImm12(offset, value.Type))
+            {
+                context.Assembler.StrRiUn(value, address, offset);
+            }
+            else if (CodeGenCommon.ConstFitsOnSImm9(offset))
+            {
+                context.Assembler.Stur(value, address, offset);
+            }
+            else
+            {
+                Operand tempAddress = Register(CodeGenCommon.ReservedRegister);
+                GenerateConstantCopy(context, tempAddress, (ulong)offset);
+                context.Assembler.Add(tempAddress, address, tempAddress, ArmExtensionType.Uxtx); // Address might be SP and must be the first input.
+                context.Assembler.StrRiUn(value, tempAddress, 0);
+            }
+        }
+
+        private static void GenerateSpill(CodeGenContext context, Operation operation)
+        {
+            GenerateSpill(context, operation, context.CallArgsRegionSize + context.FpLrSaveRegionSize);
+        }
+
+        private static void GenerateSpillArg(CodeGenContext context, Operation operation)
+        {
+            GenerateSpill(context, operation, 0);
+        }
+
+        private static void GenerateStackAlloc(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand offset = operation.GetSource(0);
+
+            Debug.Assert(offset.Kind == OperandKind.Constant);
+
+            int offs = offset.AsInt32() + context.CallArgsRegionSize + context.FpLrSaveRegionSize;
+
+            context.Assembler.Add(dest, Register(SpRegister), Const(dest.Type, offs));
+        }
+
+        private static void GenerateStore(CodeGenContext context, Operation operation)
+        {
+            Operand value   = operation.GetSource(1);
+            Operand address = operation.GetSource(0);
+
+            context.Assembler.Str(value, address);
+        }
+
+        private static void GenerateStore16(CodeGenContext context, Operation operation)
+        {
+            Operand value   = operation.GetSource(1);
+            Operand address = operation.GetSource(0);
+
+            Debug.Assert(value.Type.IsInteger());
+
+            context.Assembler.StrhRiUn(value, address, 0);
+        }
+
+        private static void GenerateStore8(CodeGenContext context, Operation operation)
+        {
+            Operand value   = operation.GetSource(1);
+            Operand address = operation.GetSource(0);
+
+            Debug.Assert(value.Type.IsInteger());
+
+            context.Assembler.StrbRiUn(value, address, 0);
+        }
+
+        private static void GenerateSpill(CodeGenContext context, Operation operation, int baseOffset)
+        {
+            Operand offset = operation.GetSource(0);
+            Operand source = operation.GetSource(1);
+
+            Debug.Assert(offset.Kind == OperandKind.Constant);
+
+            int offs = offset.AsInt32() + baseOffset;
+
+            GenerateStore(context, source, Register(SpRegister), offs);
+        }
+
+        private static void GenerateSubtract(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+
+            // ValidateBinOp(dest, src1, src2);
+
+            if (dest.Type.IsInteger())
+            {
+                context.Assembler.Sub(dest, src1, src2);
+            }
+            else
+            {
+                context.Assembler.FsubScalar(dest, src1, src2);
+            }
+        }
+
+        private static void GenerateTailcall(CodeGenContext context, Operation operation)
+        {
+            WriteEpilogue(context);
+
+            context.Assembler.Br(operation.GetSource(0));
+        }
+
+        private static void GenerateVectorCreateScalar(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            if (dest != default)
+            {
+                Debug.Assert(!dest.Type.IsInteger() && source.Type.IsInteger());
+
+                OperandType destType = source.Type == OperandType.I64 ? OperandType.FP64 : OperandType.FP32;
+
+                context.Assembler.Fmov(Register(dest, destType), source, topHalf: false);
+            }
+        }
+
+        private static void GenerateVectorExtract(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;  // Value
+            Operand src1 = operation.GetSource(0); // Vector
+            Operand src2 = operation.GetSource(1); // Index
+
+            Debug.Assert(src1.Type == OperandType.V128);
+            Debug.Assert(src2.Kind == OperandKind.Constant);
+
+            byte index = src2.AsByte();
+
+            Debug.Assert(index < OperandType.V128.GetSizeInBytes() / dest.Type.GetSizeInBytes());
+
+            if (dest.Type.IsInteger())
+            {
+                context.Assembler.Umov(dest, src1, index, dest.Type == OperandType.I64 ? 3 : 2);
+            }
+            else
+            {
+                context.Assembler.DupScalar(dest, src1, index, dest.Type == OperandType.FP64 ? 3 : 2);
+            }
+        }
+
+        private static void GenerateVectorExtract16(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;  // Value
+            Operand src1 = operation.GetSource(0); // Vector
+            Operand src2 = operation.GetSource(1); // Index
+
+            Debug.Assert(src1.Type == OperandType.V128);
+            Debug.Assert(src2.Kind == OperandKind.Constant);
+
+            byte index = src2.AsByte();
+
+            Debug.Assert(index < 8);
+
+            context.Assembler.Umov(dest, src1, index, 1);
+        }
+
+        private static void GenerateVectorExtract8(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;  // Value
+            Operand src1 = operation.GetSource(0); // Vector
+            Operand src2 = operation.GetSource(1); // Index
+
+            Debug.Assert(src1.Type == OperandType.V128);
+            Debug.Assert(src2.Kind == OperandKind.Constant);
+
+            byte index = src2.AsByte();
+
+            Debug.Assert(index < 16);
+
+            context.Assembler.Umov(dest, src1, index, 0);
+        }
+
+        private static void GenerateVectorInsert(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0); // Vector
+            Operand src2 = operation.GetSource(1); // Value
+            Operand src3 = operation.GetSource(2); // Index
+
+            EnsureSameReg(dest, src1);
+
+            Debug.Assert(src1.Type == OperandType.V128);
+            Debug.Assert(src3.Kind == OperandKind.Constant);
+
+            byte index = src3.AsByte();
+
+            if (src2.Type.IsInteger())
+            {
+                context.Assembler.Ins(dest, src2, index, src2.Type == OperandType.I64 ? 3 : 2);
+            }
+            else
+            {
+                context.Assembler.Ins(dest, src2, 0, index, src2.Type == OperandType.FP64 ? 3 : 2);
+            }
+        }
+
+        private static void GenerateVectorInsert16(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0); // Vector
+            Operand src2 = operation.GetSource(1); // Value
+            Operand src3 = operation.GetSource(2); // Index
+
+            EnsureSameReg(dest, src1);
+
+            Debug.Assert(src1.Type == OperandType.V128);
+            Debug.Assert(src3.Kind == OperandKind.Constant);
+
+            byte index = src3.AsByte();
+
+            context.Assembler.Ins(dest, src2, index, 1);
+        }
+
+        private static void GenerateVectorInsert8(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0); // Vector
+            Operand src2 = operation.GetSource(1); // Value
+            Operand src3 = operation.GetSource(2); // Index
+
+            EnsureSameReg(dest, src1);
+
+            Debug.Assert(src1.Type == OperandType.V128);
+            Debug.Assert(src3.Kind == OperandKind.Constant);
+
+            byte index = src3.AsByte();
+
+            context.Assembler.Ins(dest, src2, index, 0);
+        }
+
+        private static void GenerateVectorOne(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+
+            Debug.Assert(!dest.Type.IsInteger());
+
+            context.Assembler.CmeqVector(dest, dest, dest, 2);
+        }
+
+        private static void GenerateVectorZero(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+
+            Debug.Assert(!dest.Type.IsInteger());
+
+            context.Assembler.EorVector(dest, dest, dest);
+        }
+
+        private static void GenerateVectorZeroUpper64(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128);
+
+            context.Assembler.Fmov(Register(dest, OperandType.FP64), Register(source, OperandType.FP64));
+        }
+
+        private static void GenerateVectorZeroUpper96(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128);
+
+            context.Assembler.Fmov(Register(dest, OperandType.FP32), Register(source, OperandType.FP32));
+        }
+
+        private static void GenerateZeroExtend16(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+            context.Assembler.Uxth(dest, source);
+        }
+
+        private static void GenerateZeroExtend32(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+            // We can eliminate the move if source is already 32-bit and the registers are the same.
+            if (dest.Value == source.Value && source.Type == OperandType.I32)
+            {
+                return;
+            }
+
+            context.Assembler.Mov(Register(dest.GetRegister().Index, OperandType.I32), source);
+        }
+
+        private static void GenerateZeroExtend8(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+            context.Assembler.Uxtb(dest, source);
+        }
+
+        private static UnwindInfo WritePrologue(CodeGenContext context)
+        {
+            List<UnwindPushEntry> pushEntries = new List<UnwindPushEntry>();
+
+            Operand rsp = Register(SpRegister);
+
+            int intMask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters;
+            int vecMask = CallingConvention.GetFpCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters;
+
+            int intCalleeSavedRegsCount = BitOperations.PopCount((uint)intMask);
+            int vecCalleeSavedRegsCount = BitOperations.PopCount((uint)vecMask);
+
+            int calleeSaveRegionSize = Align16(intCalleeSavedRegsCount * 8 + vecCalleeSavedRegsCount * 8);
+
+            int offset = 0;
+
+            WritePrologueCalleeSavesPreIndexed(context, pushEntries, ref intMask, ref offset, calleeSaveRegionSize, OperandType.I64);
+            WritePrologueCalleeSavesPreIndexed(context, pushEntries, ref vecMask, ref offset, calleeSaveRegionSize, OperandType.FP64);
+
+            int localSize = Align16(context.AllocResult.SpillRegionSize + context.FpLrSaveRegionSize);
+            int outArgsSize = context.CallArgsRegionSize;
+
+            if (CodeGenCommon.ConstFitsOnSImm7(localSize, DWordScale))
+            {
+                if (context.HasCall)
+                {
+                    context.Assembler.StpRiPre(Register(FpRegister), Register(LrRegister), rsp, -localSize);
+                    context.Assembler.MovSp(Register(FpRegister), rsp);
+                }
+
+                if (outArgsSize != 0)
+                {
+                    context.Assembler.Sub(rsp, rsp, Const(OperandType.I64, outArgsSize));
+                }
+            }
+            else
+            {
+                int frameSize = localSize + outArgsSize;
+                if (frameSize != 0)
+                {
+                    if (CodeGenCommon.ConstFitsOnUImm12(frameSize))
+                    {
+                        context.Assembler.Sub(rsp, rsp, Const(OperandType.I64, frameSize));
+                    }
+                    else
+                    {
+                        Operand tempSize = Register(CodeGenCommon.ReservedRegister);
+                        GenerateConstantCopy(context, tempSize, (ulong)frameSize);
+                        context.Assembler.Sub(rsp, rsp, tempSize, ArmExtensionType.Uxtx);
+                    }
+                }
+
+                context.Assembler.StpRiUn(Register(FpRegister), Register(LrRegister), rsp, outArgsSize);
+
+                if (outArgsSize != 0)
+                {
+                    context.Assembler.Add(Register(FpRegister), Register(SpRegister), Const(OperandType.I64, outArgsSize));
+                }
+                else
+                {
+                    context.Assembler.MovSp(Register(FpRegister), Register(SpRegister));
+                }
+            }
+
+            return new UnwindInfo(pushEntries.ToArray(), context.StreamOffset);
+        }
+
+        private static void WritePrologueCalleeSavesPreIndexed(
+            CodeGenContext context,
+            List<UnwindPushEntry> pushEntries,
+            ref int mask,
+            ref int offset,
+            int calleeSaveRegionSize,
+            OperandType type)
+        {
+            if ((BitOperations.PopCount((uint)mask) & 1) != 0)
+            {
+                int reg = BitOperations.TrailingZeroCount(mask);
+
+                pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg));
+
+                mask &= ~(1 << reg);
+
+                if (offset != 0)
+                {
+                    context.Assembler.StrRiUn(Register(reg, type), Register(SpRegister), offset);
+                }
+                else
+                {
+                    context.Assembler.StrRiPre(Register(reg, type), Register(SpRegister), -calleeSaveRegionSize);
+                }
+
+                offset += type.GetSizeInBytes();
+            }
+
+            while (mask != 0)
+            {
+                int reg = BitOperations.TrailingZeroCount(mask);
+
+                pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg));
+
+                mask &= ~(1 << reg);
+
+                int reg2 = BitOperations.TrailingZeroCount(mask);
+
+                pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg2));
+
+                mask &= ~(1 << reg2);
+
+                if (offset != 0)
+                {
+                    context.Assembler.StpRiUn(Register(reg, type), Register(reg2, type), Register(SpRegister), offset);
+                }
+                else
+                {
+                    context.Assembler.StpRiPre(Register(reg, type), Register(reg2, type), Register(SpRegister), -calleeSaveRegionSize);
+                }
+
+                offset += type.GetSizeInBytes() * 2;
+            }
+        }
+
+        private static void WriteEpilogue(CodeGenContext context)
+        {
+            Operand rsp = Register(SpRegister);
+
+            int localSize = Align16(context.AllocResult.SpillRegionSize + context.FpLrSaveRegionSize);
+            int outArgsSize = context.CallArgsRegionSize;
+
+            if (CodeGenCommon.ConstFitsOnSImm7(localSize, DWordScale))
+            {
+                if (outArgsSize != 0)
+                {
+                    context.Assembler.Add(rsp, rsp, Const(OperandType.I64, outArgsSize));
+                }
+
+                if (context.HasCall)
+                {
+                    context.Assembler.LdpRiPost(Register(FpRegister), Register(LrRegister), rsp, localSize);
+                }
+            }
+            else
+            {
+                if (context.HasCall)
+                {
+                    context.Assembler.LdpRiUn(Register(FpRegister), Register(LrRegister), rsp, outArgsSize);
+                }
+
+                int frameSize = localSize + outArgsSize;
+                if (frameSize != 0)
+                {
+                    if (CodeGenCommon.ConstFitsOnUImm12(frameSize))
+                    {
+                        context.Assembler.Add(rsp, rsp, Const(OperandType.I64, frameSize));
+                    }
+                    else
+                    {
+                        Operand tempSize = Register(CodeGenCommon.ReservedRegister);
+                        GenerateConstantCopy(context, tempSize, (ulong)frameSize);
+                        context.Assembler.Add(rsp, rsp, tempSize, ArmExtensionType.Uxtx);
+                    }
+                }
+            }
+
+            int intMask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters;
+            int vecMask = CallingConvention.GetFpCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters;
+
+            int intCalleeSavedRegsCount = BitOperations.PopCount((uint)intMask);
+            int vecCalleeSavedRegsCount = BitOperations.PopCount((uint)vecMask);
+
+            int offset = intCalleeSavedRegsCount * 8 + vecCalleeSavedRegsCount * 8;
+            int calleeSaveRegionSize = Align16(offset);
+
+            WriteEpilogueCalleeSavesPostIndexed(context, ref vecMask, ref offset, calleeSaveRegionSize, OperandType.FP64);
+            WriteEpilogueCalleeSavesPostIndexed(context, ref intMask, ref offset, calleeSaveRegionSize, OperandType.I64);
+        }
+
+        private static void WriteEpilogueCalleeSavesPostIndexed(
+            CodeGenContext context,
+            ref int mask,
+            ref int offset,
+            int calleeSaveRegionSize,
+            OperandType type)
+        {
+            while (mask != 0)
+            {
+                int reg = BitUtils.HighestBitSet(mask);
+
+                mask &= ~(1 << reg);
+
+                if (mask != 0)
+                {
+                    int reg2 = BitUtils.HighestBitSet(mask);
+
+                    mask &= ~(1 << reg2);
+
+                    offset -= type.GetSizeInBytes() * 2;
+
+                    if (offset != 0)
+                    {
+                        context.Assembler.LdpRiUn(Register(reg2, type), Register(reg, type), Register(SpRegister), offset);
+                    }
+                    else
+                    {
+                        context.Assembler.LdpRiPost(Register(reg2, type), Register(reg, type), Register(SpRegister), calleeSaveRegionSize);
+                    }
+                }
+                else
+                {
+                    offset -= type.GetSizeInBytes();
+
+                    if (offset != 0)
+                    {
+                        context.Assembler.LdrRiUn(Register(reg, type), Register(SpRegister), offset);
+                    }
+                    else
+                    {
+                        context.Assembler.LdrRiPost(Register(reg, type), Register(SpRegister), calleeSaveRegionSize);
+                    }
+                }
+            }
+        }
+
+        private static void GenerateConstantCopy(CodeGenContext context, Operand dest, ulong value)
+        {
+            if (value != 0)
+            {
+                int hw = 0;
+                bool first = true;
+
+                while (value != 0)
+                {
+                    int valueLow = (ushort)value;
+                    if (valueLow != 0)
+                    {
+                        if (first)
+                        {
+                            context.Assembler.Movz(dest, valueLow, hw);
+                            first = false;
+                        }
+                        else
+                        {
+                            context.Assembler.Movk(dest, valueLow, hw);
+                        }
+                    }
+
+                    hw++;
+                    value >>= 16;
+                }
+            }
+            else
+            {
+                context.Assembler.Mov(dest, Register(ZrRegister, dest.Type));
+            }
+        }
+
+        private static void GenerateAtomicCas(
+            CodeGenContext context,
+            Operand address,
+            Operand expected,
+            Operand desired,
+            Operand actual,
+            Operand result,
+            AccessSize accessSize)
+        {
+            int startOffset = context.StreamOffset;
+
+            switch (accessSize)
+            {
+                case AccessSize.Byte:
+                    context.Assembler.Ldaxrb(actual, address);
+                    break;
+                case AccessSize.Hword:
+                    context.Assembler.Ldaxrh(actual, address);
+                    break;
+                default:
+                    context.Assembler.Ldaxr(actual, address);
+                    break;
+            }
+
+            context.Assembler.Cmp(actual, expected);
+
+            context.JumpToNear(ArmCondition.Ne);
+
+            switch (accessSize)
+            {
+                case AccessSize.Byte:
+                    context.Assembler.Stlxrb(desired, address, result);
+                    break;
+                case AccessSize.Hword:
+                    context.Assembler.Stlxrh(desired, address, result);
+                    break;
+                default:
+                    context.Assembler.Stlxr(desired, address, result);
+                    break;
+            }
+
+            context.Assembler.Cbnz(result, startOffset - context.StreamOffset); // Retry if store failed.
+
+            context.JumpHere();
+
+            context.Assembler.Clrex();
+        }
+
+        private static void GenerateAtomicDcas(
+            CodeGenContext context,
+            Operand address,
+            Operand expectedLow,
+            Operand expectedHigh,
+            Operand desiredLow,
+            Operand desiredHigh,
+            Operand actualLow,
+            Operand actualHigh,
+            Operand temp0,
+            Operand temp1)
+        {
+            int startOffset = context.StreamOffset;
+
+            context.Assembler.Ldaxp(actualLow, actualHigh, address);
+            context.Assembler.Eor(temp0, actualHigh, expectedHigh);
+            context.Assembler.Eor(temp1, actualLow, expectedLow);
+            context.Assembler.Orr(temp0, temp1, temp0);
+
+            context.JumpToNearIfNotZero(temp0);
+
+            Operand result = Register(temp0, OperandType.I32);
+
+            context.Assembler.Stlxp(desiredLow, desiredHigh, address, result);
+            context.Assembler.Cbnz(result, startOffset - context.StreamOffset); // Retry if store failed.
+
+            context.JumpHere();
+
+            context.Assembler.Clrex();
+        }
+
+        private static bool TryPairMemoryOp(CodeGenContext context, Operation currentOp, Operation nextOp)
+        {
+            if (!TryGetMemOpBaseAndOffset(currentOp, out Operand op1Base, out int op1Offset))
+            {
+                return false;
+            }
+
+            if (!TryGetMemOpBaseAndOffset(nextOp, out Operand op2Base, out int op2Offset))
+            {
+                return false;
+            }
+
+            if (op1Base != op2Base)
+            {
+                return false;
+            }
+
+            OperandType valueType = GetMemOpValueType(currentOp);
+
+            if (valueType != GetMemOpValueType(nextOp) || op1Offset + valueType.GetSizeInBytes() != op2Offset)
+            {
+                return false;
+            }
+
+            if (!CodeGenCommon.ConstFitsOnSImm7(op1Offset, valueType.GetSizeInBytesLog2()))
+            {
+                return false;
+            }
+
+            if (currentOp.Instruction == Instruction.Load)
+            {
+                context.Assembler.LdpRiUn(currentOp.Destination, nextOp.Destination, op1Base, op1Offset);
+            }
+            else if (currentOp.Instruction == Instruction.Store)
+            {
+                context.Assembler.StpRiUn(currentOp.GetSource(1), nextOp.GetSource(1), op1Base, op1Offset);
+            }
+            else
+            {
+                return false;
+            }
+
+            return true;
+        }
+
+        private static bool IsLoadOrStore(Operation operation)
+        {
+            return operation.Instruction == Instruction.Load || operation.Instruction == Instruction.Store;
+        }
+
+        private static OperandType GetMemOpValueType(Operation operation)
+        {
+            if (operation.Destination != default)
+            {
+                return operation.Destination.Type;
+            }
+
+            return operation.GetSource(1).Type;
+        }
+
+        private static bool TryGetMemOpBaseAndOffset(Operation operation, out Operand baseAddress, out int offset)
+        {
+            baseAddress = default;
+            offset = 0;
+            Operand address = operation.GetSource(0);
+
+            if (address.Kind != OperandKind.Memory)
+            {
+                return false;
+            }
+
+            MemoryOperand memOp = address.GetMemory();
+            Operand baseOp = memOp.BaseAddress;
+
+            if (baseOp == default)
+            {
+                baseOp = memOp.Index;
+
+                if (baseOp == default || memOp.Scale != Multiplier.x1)
+                {
+                    return false;
+                }
+            }
+            if (memOp.Index != default)
+            {
+                return false;
+            }
+
+            baseAddress = memOp.BaseAddress;
+            offset = memOp.Displacement;
+
+            return true;
+        }
+
+        private static Operand Register(Operand operand, OperandType type = OperandType.I64)
+        {
+            return Register(operand.GetRegister().Index, type);
+        }
+
+        private static Operand Register(int register, OperandType type = OperandType.I64)
+        {
+            return Factory.Register(register, RegisterType.Integer, type);
+        }
+
+        private static int Align16(int value)
+        {
+            return (value + 0xf) & ~0xf;
+        }
+
+        [Conditional("DEBUG")]
+        private static void ValidateUnOp(Operand dest, Operand source)
+        {
+            // Destination and source aren't forced to be equals
+            // EnsureSameReg (dest, source);
+            EnsureSameType(dest, source);
+        }
+
+        [Conditional("DEBUG")]
+        private static void ValidateBinOp(Operand dest, Operand src1, Operand src2)
+        {
+            // Destination and source aren't forced to be equals
+            // EnsureSameReg (dest, src1);
+            EnsureSameType(dest, src1, src2);
+        }
+
+        [Conditional("DEBUG")]
+        private static void ValidateShift(Operand dest, Operand src1, Operand src2)
+        {
+            // Destination and source aren't forced to be equals
+            // EnsureSameReg (dest, src1);
+            EnsureSameType(dest, src1);
+
+            Debug.Assert(dest.Type.IsInteger() && src2.Type == OperandType.I32);
+        }
+
+        private static void EnsureSameReg(Operand op1, Operand op2)
+        {
+            Debug.Assert(op1.Kind == OperandKind.Register || op1.Kind == OperandKind.Memory);
+            Debug.Assert(op1.Kind == op2.Kind);
+            Debug.Assert(op1.Value == op2.Value);
+        }
+
+        private static void EnsureSameType(Operand op1, Operand op2)
+        {
+            Debug.Assert(op1.Type == op2.Type);
+        }
+
+        private static void EnsureSameType(Operand op1, Operand op2, Operand op3)
+        {
+            Debug.Assert(op1.Type == op2.Type);
+            Debug.Assert(op1.Type == op3.Type);
+        }
+
+        private static void EnsureSameType(Operand op1, Operand op2, Operand op3, Operand op4)
+        {
+            Debug.Assert(op1.Type == op2.Type);
+            Debug.Assert(op1.Type == op3.Type);
+            Debug.Assert(op1.Type == op4.Type);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs b/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs
new file mode 100644
index 0000000000..aaa00bb653
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs
@@ -0,0 +1,662 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Diagnostics;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+    static class CodeGeneratorIntrinsic
+    {
+        public static void GenerateOperation(CodeGenContext context, Operation operation)
+        {
+            Intrinsic intrin = operation.Intrinsic;
+
+            IntrinsicInfo info = IntrinsicTable.GetInfo(intrin & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask));
+
+            switch (info.Type)
+            {
+                case IntrinsicType.ScalarUnary:
+                    GenerateVectorUnary(
+                        context,
+                        0,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(0));
+                    break;
+                case IntrinsicType.ScalarUnaryByElem:
+                    Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+                    GenerateVectorUnaryByElem(
+                        context,
+                        0,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        (uint)operation.GetSource(1).AsInt32(),
+                        operation.Destination,
+                        operation.GetSource(0));
+                    break;
+                case IntrinsicType.ScalarBinary:
+                    GenerateVectorBinary(
+                        context,
+                        0,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(0),
+                        operation.GetSource(1));
+                    break;
+                case IntrinsicType.ScalarBinaryFPByElem:
+                    Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+                    GenerateVectorBinaryFPByElem(
+                        context,
+                        0,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        (uint)operation.GetSource(2).AsInt32(),
+                        operation.Destination,
+                        operation.GetSource(0),
+                        operation.GetSource(1));
+                    break;
+                case IntrinsicType.ScalarBinaryRd:
+                    GenerateVectorUnary(
+                        context,
+                        0,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(1));
+                    break;
+                case IntrinsicType.ScalarBinaryShl:
+                    Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+                    GenerateVectorBinaryShlImm(
+                        context,
+                        0,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(0),
+                        (uint)operation.GetSource(1).AsInt32());
+                    break;
+                case IntrinsicType.ScalarBinaryShr:
+                    Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+                    GenerateVectorBinaryShrImm(
+                        context,
+                        0,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(0),
+                        (uint)operation.GetSource(1).AsInt32());
+                    break;
+                case IntrinsicType.ScalarFPCompare:
+                    GenerateScalarFPCompare(
+                        context,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(0),
+                        operation.GetSource(1));
+                    break;
+                case IntrinsicType.ScalarFPConvFixed:
+                    Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+                    GenerateVectorBinaryShrImm(
+                        context,
+                        0,
+                        ((uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift) + 2u,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(0),
+                        (uint)operation.GetSource(1).AsInt32());
+                    break;
+                case IntrinsicType.ScalarFPConvFixedGpr:
+                    Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+                    GenerateScalarFPConvGpr(
+                        context,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(0),
+                        (uint)operation.GetSource(1).AsInt32());
+                    break;
+                case IntrinsicType.ScalarFPConvGpr:
+                    GenerateScalarFPConvGpr(
+                        context,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(0));
+                    break;
+                case IntrinsicType.ScalarTernary:
+                    GenerateScalarTernary(
+                        context,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(1),
+                        operation.GetSource(2),
+                        operation.GetSource(0));
+                    break;
+                case IntrinsicType.ScalarTernaryFPRdByElem:
+                    Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
+
+                    GenerateVectorBinaryFPByElem(
+                        context,
+                        0,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        (uint)operation.GetSource(3).AsInt32(),
+                        operation.Destination,
+                        operation.GetSource(1),
+                        operation.GetSource(2));
+                    break;
+                case IntrinsicType.ScalarTernaryShlRd:
+                    Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+                    GenerateVectorBinaryShlImm(
+                        context,
+                        0,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(1),
+                        (uint)operation.GetSource(2).AsInt32());
+                    break;
+                case IntrinsicType.ScalarTernaryShrRd:
+                    Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+                    GenerateVectorBinaryShrImm(
+                        context,
+                        0,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(1),
+                        (uint)operation.GetSource(2).AsInt32());
+                    break;
+
+                case IntrinsicType.VectorUnary:
+                    GenerateVectorUnary(
+                        context,
+                        (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(0));
+                    break;
+                case IntrinsicType.VectorUnaryByElem:
+                    Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+                    GenerateVectorUnaryByElem(
+                        context,
+                        (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        (uint)operation.GetSource(1).AsInt32(),
+                        operation.Destination,
+                        operation.GetSource(0));
+                    break;
+                case IntrinsicType.VectorBinary:
+                    GenerateVectorBinary(
+                        context,
+                        (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(0),
+                        operation.GetSource(1));
+                    break;
+                case IntrinsicType.VectorBinaryBitwise:
+                    GenerateVectorBinary(
+                        context,
+                        (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(0),
+                        operation.GetSource(1));
+                    break;
+                case IntrinsicType.VectorBinaryByElem:
+                    Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+                    GenerateVectorBinaryByElem(
+                        context,
+                        (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        (uint)operation.GetSource(2).AsInt32(),
+                        operation.Destination,
+                        operation.GetSource(0),
+                        operation.GetSource(1));
+                    break;
+                case IntrinsicType.VectorBinaryFPByElem:
+                    Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+                    GenerateVectorBinaryFPByElem(
+                        context,
+                        (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        (uint)operation.GetSource(2).AsInt32(),
+                        operation.Destination,
+                        operation.GetSource(0),
+                        operation.GetSource(1));
+                    break;
+                case IntrinsicType.VectorBinaryRd:
+                    GenerateVectorUnary(
+                        context,
+                        (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(1));
+                    break;
+                case IntrinsicType.VectorBinaryShl:
+                    Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+                    GenerateVectorBinaryShlImm(
+                        context,
+                        (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(0),
+                        (uint)operation.GetSource(1).AsInt32());
+                    break;
+                case IntrinsicType.VectorBinaryShr:
+                    Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+                    GenerateVectorBinaryShrImm(
+                        context,
+                        (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(0),
+                        (uint)operation.GetSource(1).AsInt32());
+                    break;
+                case IntrinsicType.VectorFPConvFixed:
+                    Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+                    GenerateVectorBinaryShrImm(
+                        context,
+                        (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+                        ((uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift) + 2u,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(0),
+                        (uint)operation.GetSource(1).AsInt32());
+                    break;
+                case IntrinsicType.VectorInsertByElem:
+                    Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+                    Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
+
+                    GenerateVectorInsertByElem(
+                        context,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        (uint)operation.GetSource(3).AsInt32(),
+                        (uint)operation.GetSource(1).AsInt32(),
+                        operation.Destination,
+                        operation.GetSource(2));
+                    break;
+                case IntrinsicType.VectorLookupTable:
+                    Debug.Assert((uint)(operation.SourcesCount - 2) <= 3);
+
+                    for (int i = 1; i < operation.SourcesCount - 1; i++)
+                    {
+                        Register currReg = operation.GetSource(i).GetRegister();
+                        Register prevReg = operation.GetSource(i - 1).GetRegister();
+
+                        Debug.Assert(prevReg.Index + 1 == currReg.Index && currReg.Type == RegisterType.Vector);
+                    }
+
+                    GenerateVectorBinary(
+                        context,
+                        (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+                        info.Inst | ((uint)(operation.SourcesCount - 2) << 13),
+                        operation.Destination,
+                        operation.GetSource(0),
+                        operation.GetSource(operation.SourcesCount - 1));
+                    break;
+                case IntrinsicType.VectorTernaryFPRdByElem:
+                    Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
+
+                    GenerateVectorBinaryFPByElem(
+                        context,
+                        (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        (uint)operation.GetSource(3).AsInt32(),
+                        operation.Destination,
+                        operation.GetSource(1),
+                        operation.GetSource(2));
+                    break;
+                case IntrinsicType.VectorTernaryRd:
+                    GenerateVectorBinary(
+                        context,
+                        (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(1),
+                        operation.GetSource(2));
+                    break;
+                case IntrinsicType.VectorTernaryRdBitwise:
+                    GenerateVectorBinary(
+                        context,
+                        (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(1),
+                        operation.GetSource(2));
+                    break;
+                case IntrinsicType.VectorTernaryRdByElem:
+                    Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
+
+                    GenerateVectorBinaryByElem(
+                        context,
+                        (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        (uint)operation.GetSource(3).AsInt32(),
+                        operation.Destination,
+                        operation.GetSource(1),
+                        operation.GetSource(2));
+                    break;
+                case IntrinsicType.VectorTernaryShlRd:
+                    Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+                    GenerateVectorBinaryShlImm(
+                        context,
+                        (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(1),
+                        (uint)operation.GetSource(2).AsInt32());
+                    break;
+                case IntrinsicType.VectorTernaryShrRd:
+                    Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+                    GenerateVectorBinaryShrImm(
+                        context,
+                        (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+                        (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(1),
+                        (uint)operation.GetSource(2).AsInt32());
+                    break;
+
+                case IntrinsicType.GetRegister:
+                    context.Assembler.WriteInstruction(info.Inst, operation.Destination);
+                    break;
+                case IntrinsicType.SetRegister:
+                    context.Assembler.WriteInstruction(info.Inst, operation.GetSource(0));
+                    break;
+
+                default:
+                    throw new NotImplementedException(info.Type.ToString());
+            }
+        }
+
+        private static void GenerateScalarFPCompare(
+            CodeGenContext context,
+            uint sz,
+            uint instruction,
+            Operand dest,
+            Operand rn,
+            Operand rm)
+        {
+            instruction |= (sz << 22);
+
+            if (rm.Kind == OperandKind.Constant && rm.Value == 0)
+            {
+                instruction |= 0b1000;
+                rm = rn;
+            }
+
+            context.Assembler.WriteInstructionRm16NoRet(instruction, rn, rm);
+            context.Assembler.Mrs(dest, 1, 3, 4, 2, 0);
+        }
+
+        private static void GenerateScalarFPConvGpr(
+            CodeGenContext context,
+            uint sz,
+            uint instruction,
+            Operand rd,
+            Operand rn)
+        {
+            instruction |= (sz << 22);
+
+            if (rd.Type.IsInteger())
+            {
+                context.Assembler.WriteInstructionAuto(instruction, rd, rn);
+            }
+            else
+            {
+                if (rn.Type == OperandType.I64)
+                {
+                    instruction |= Assembler.SfFlag;
+                }
+
+                context.Assembler.WriteInstruction(instruction, rd, rn);
+            }
+        }
+
+        private static void GenerateScalarFPConvGpr(
+            CodeGenContext context,
+            uint sz,
+            uint instruction,
+            Operand rd,
+            Operand rn,
+            uint fBits)
+        {
+            Debug.Assert(fBits <= 64);
+
+            instruction |= (sz << 22);
+            instruction |= (64 - fBits) << 10;
+
+            if (rd.Type.IsInteger())
+            {
+                Debug.Assert(rd.Type != OperandType.I32 || fBits <= 32);
+
+                context.Assembler.WriteInstructionAuto(instruction, rd, rn);
+            }
+            else
+            {
+                if (rn.Type == OperandType.I64)
+                {
+                    instruction |= Assembler.SfFlag;
+                }
+                else
+                {
+                    Debug.Assert(fBits <= 32);
+                }
+
+                context.Assembler.WriteInstruction(instruction, rd, rn);
+            }
+
+        }
+
+        private static void GenerateScalarTernary(
+            CodeGenContext context,
+            uint sz,
+            uint instruction,
+            Operand rd,
+            Operand rn,
+            Operand rm,
+            Operand ra)
+        {
+            instruction |= (sz << 22);
+
+            context.Assembler.WriteInstruction(instruction, rd, rn, rm, ra);
+        }
+
+        private static void GenerateVectorUnary(
+            CodeGenContext context,
+            uint q,
+            uint sz,
+            uint instruction,
+            Operand rd,
+            Operand rn)
+        {
+            instruction |= (q << 30) | (sz << 22);
+
+            context.Assembler.WriteInstruction(instruction, rd, rn);
+        }
+
+        private static void GenerateVectorUnaryByElem(
+            CodeGenContext context,
+            uint q,
+            uint sz,
+            uint instruction,
+            uint srcIndex,
+            Operand rd,
+            Operand rn)
+        {
+            uint imm5 = (srcIndex << ((int)sz + 1)) | (1u << (int)sz);
+
+            instruction |= (q << 30) | (imm5 << 16);
+
+            context.Assembler.WriteInstruction(instruction, rd, rn);
+        }
+
+        private static void GenerateVectorBinary(
+            CodeGenContext context,
+            uint q,
+            uint instruction,
+            Operand rd,
+            Operand rn,
+            Operand rm)
+        {
+            instruction |= (q << 30);
+
+            context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
+        }
+
+        private static void GenerateVectorBinary(
+            CodeGenContext context,
+            uint q,
+            uint sz,
+            uint instruction,
+            Operand rd,
+            Operand rn,
+            Operand rm)
+        {
+            instruction |= (q << 30) | (sz << 22);
+
+            context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
+        }
+
+        private static void GenerateVectorBinaryByElem(
+            CodeGenContext context,
+            uint q,
+            uint size,
+            uint instruction,
+            uint srcIndex,
+            Operand rd,
+            Operand rn,
+            Operand rm)
+        {
+            instruction |= (q << 30) | (size << 22);
+
+            if (size == 2)
+            {
+                instruction |= ((srcIndex & 1) << 21) | ((srcIndex & 2) << 10);
+            }
+            else
+            {
+                instruction |= ((srcIndex & 3) << 20) | ((srcIndex & 4) << 9);
+            }
+
+            context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
+        }
+
+        private static void GenerateVectorBinaryFPByElem(
+            CodeGenContext context,
+            uint q,
+            uint sz,
+            uint instruction,
+            uint srcIndex,
+            Operand rd,
+            Operand rn,
+            Operand rm)
+        {
+            instruction |= (q << 30) | (sz << 22);
+
+            if (sz != 0)
+            {
+                instruction |= (srcIndex & 1) << 11;
+            }
+            else
+            {
+                instruction |= ((srcIndex & 1) << 21) | ((srcIndex & 2) << 10);
+            }
+
+            context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
+        }
+
+        private static void GenerateVectorBinaryShlImm(
+            CodeGenContext context,
+            uint q,
+            uint sz,
+            uint instruction,
+            Operand rd,
+            Operand rn,
+            uint shift)
+        {
+            instruction |= (q << 30);
+
+            Debug.Assert(shift >= 0 && shift < (8u << (int)sz));
+
+            uint imm = (8u << (int)sz) | (shift & (0x3fu >> (int)(3 - sz)));
+
+            instruction |= (imm << 16);
+
+            context.Assembler.WriteInstruction(instruction, rd, rn);
+        }
+
+        private static void GenerateVectorBinaryShrImm(
+            CodeGenContext context,
+            uint q,
+            uint sz,
+            uint instruction,
+            Operand rd,
+            Operand rn,
+            uint shift)
+        {
+            instruction |= (q << 30);
+
+            Debug.Assert(shift > 0 && shift <= (8u << (int)sz));
+
+            uint imm = (8u << (int)sz) | ((8u << (int)sz) - shift);
+
+            instruction |= (imm << 16);
+
+            context.Assembler.WriteInstruction(instruction, rd, rn);
+        }
+
+        private static void GenerateVectorInsertByElem(
+            CodeGenContext context,
+            uint sz,
+            uint instruction,
+            uint srcIndex,
+            uint dstIndex,
+            Operand rd,
+            Operand rn)
+        {
+            uint imm4 = srcIndex << (int)sz;
+            uint imm5 = (dstIndex << ((int)sz + 1)) | (1u << (int)sz);
+
+            instruction |= imm4 << 11;
+            instruction |= imm5 << 16;
+
+            context.Assembler.WriteInstruction(instruction, rd, rn);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs b/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs
new file mode 100644
index 0000000000..8695db9035
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.CodeGen.Arm64
+{
+    struct IntrinsicInfo
+    {
+        public uint          Inst { get; }
+        public IntrinsicType Type { get; }
+
+        public IntrinsicInfo(uint inst, IntrinsicType type)
+        {
+            Inst = inst;
+            Type = type;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs b/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs
new file mode 100644
index 0000000000..53ef152e57
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs
@@ -0,0 +1,461 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+    static class IntrinsicTable
+    {
+        private static IntrinsicInfo[] _intrinTable;
+
+        static IntrinsicTable()
+        {
+            _intrinTable = new IntrinsicInfo[EnumUtils.GetCount(typeof(Intrinsic))];
+
+            Add(Intrinsic.Arm64AbsS,          new IntrinsicInfo(0x5e20b800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64AbsV,          new IntrinsicInfo(0x0e20b800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64AddhnV,        new IntrinsicInfo(0x0e204000u, IntrinsicType.VectorTernaryRd));
+            Add(Intrinsic.Arm64AddpS,         new IntrinsicInfo(0x5e31b800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64AddpV,         new IntrinsicInfo(0x0e20bc00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64AddvV,         new IntrinsicInfo(0x0e31b800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64AddS,          new IntrinsicInfo(0x5e208400u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64AddV,          new IntrinsicInfo(0x0e208400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64AesdV,         new IntrinsicInfo(0x4e285800u, IntrinsicType.Vector128Unary));
+            Add(Intrinsic.Arm64AeseV,         new IntrinsicInfo(0x4e284800u, IntrinsicType.Vector128Unary));
+            Add(Intrinsic.Arm64AesimcV,       new IntrinsicInfo(0x4e287800u, IntrinsicType.Vector128Unary));
+            Add(Intrinsic.Arm64AesmcV,        new IntrinsicInfo(0x4e286800u, IntrinsicType.Vector128Unary));
+            Add(Intrinsic.Arm64AndV,          new IntrinsicInfo(0x0e201c00u, IntrinsicType.VectorBinaryBitwise));
+            Add(Intrinsic.Arm64BicVi,         new IntrinsicInfo(0x2f001400u, IntrinsicType.VectorBinaryBitwiseImm));
+            Add(Intrinsic.Arm64BicV,          new IntrinsicInfo(0x0e601c00u, IntrinsicType.VectorBinaryBitwise));
+            Add(Intrinsic.Arm64BifV,          new IntrinsicInfo(0x2ee01c00u, IntrinsicType.VectorTernaryRdBitwise));
+            Add(Intrinsic.Arm64BitV,          new IntrinsicInfo(0x2ea01c00u, IntrinsicType.VectorTernaryRdBitwise));
+            Add(Intrinsic.Arm64BslV,          new IntrinsicInfo(0x2e601c00u, IntrinsicType.VectorTernaryRdBitwise));
+            Add(Intrinsic.Arm64ClsV,          new IntrinsicInfo(0x0e204800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64ClzV,          new IntrinsicInfo(0x2e204800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64CmeqS,         new IntrinsicInfo(0x7e208c00u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64CmeqV,         new IntrinsicInfo(0x2e208c00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64CmeqSz,        new IntrinsicInfo(0x5e209800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64CmeqVz,        new IntrinsicInfo(0x0e209800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64CmgeS,         new IntrinsicInfo(0x5e203c00u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64CmgeV,         new IntrinsicInfo(0x0e203c00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64CmgeSz,        new IntrinsicInfo(0x7e208800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64CmgeVz,        new IntrinsicInfo(0x2e208800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64CmgtS,         new IntrinsicInfo(0x5e203400u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64CmgtV,         new IntrinsicInfo(0x0e203400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64CmgtSz,        new IntrinsicInfo(0x5e208800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64CmgtVz,        new IntrinsicInfo(0x0e208800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64CmhiS,         new IntrinsicInfo(0x7e203400u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64CmhiV,         new IntrinsicInfo(0x2e203400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64CmhsS,         new IntrinsicInfo(0x7e203c00u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64CmhsV,         new IntrinsicInfo(0x2e203c00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64CmleSz,        new IntrinsicInfo(0x7e209800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64CmleVz,        new IntrinsicInfo(0x2e209800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64CmltSz,        new IntrinsicInfo(0x5e20a800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64CmltVz,        new IntrinsicInfo(0x0e20a800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64CmtstS,        new IntrinsicInfo(0x5e208c00u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64CmtstV,        new IntrinsicInfo(0x0e208c00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64CntV,          new IntrinsicInfo(0x0e205800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64DupSe,         new IntrinsicInfo(0x5e000400u, IntrinsicType.ScalarUnaryByElem));
+            Add(Intrinsic.Arm64DupVe,         new IntrinsicInfo(0x0e000400u, IntrinsicType.VectorUnaryByElem));
+            Add(Intrinsic.Arm64DupGp,         new IntrinsicInfo(0x0e000c00u, IntrinsicType.VectorUnaryByElem));
+            Add(Intrinsic.Arm64EorV,          new IntrinsicInfo(0x2e201c00u, IntrinsicType.VectorBinaryBitwise));
+            Add(Intrinsic.Arm64ExtV,          new IntrinsicInfo(0x2e000000u, IntrinsicType.VectorExt));
+            Add(Intrinsic.Arm64FabdS,         new IntrinsicInfo(0x7ea0d400u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64FabdV,         new IntrinsicInfo(0x2ea0d400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64FabsV,         new IntrinsicInfo(0x0ea0f800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FabsS,         new IntrinsicInfo(0x1e20c000u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FacgeS,        new IntrinsicInfo(0x7e20ec00u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64FacgeV,        new IntrinsicInfo(0x2e20ec00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64FacgtS,        new IntrinsicInfo(0x7ea0ec00u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64FacgtV,        new IntrinsicInfo(0x2ea0ec00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64FaddpS,        new IntrinsicInfo(0x7e30d800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FaddpV,        new IntrinsicInfo(0x2e20d400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64FaddV,         new IntrinsicInfo(0x0e20d400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64FaddS,         new IntrinsicInfo(0x1e202800u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64FccmpeS,       new IntrinsicInfo(0x1e200410u, IntrinsicType.ScalarFPCompareCond));
+            Add(Intrinsic.Arm64FccmpS,        new IntrinsicInfo(0x1e200400u, IntrinsicType.ScalarFPCompareCond));
+            Add(Intrinsic.Arm64FcmeqS,        new IntrinsicInfo(0x5e20e400u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64FcmeqV,        new IntrinsicInfo(0x0e20e400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64FcmeqSz,       new IntrinsicInfo(0x5ea0d800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FcmeqVz,       new IntrinsicInfo(0x0ea0d800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FcmgeS,        new IntrinsicInfo(0x7e20e400u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64FcmgeV,        new IntrinsicInfo(0x2e20e400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64FcmgeSz,       new IntrinsicInfo(0x7ea0c800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FcmgeVz,       new IntrinsicInfo(0x2ea0c800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FcmgtS,        new IntrinsicInfo(0x7ea0e400u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64FcmgtV,        new IntrinsicInfo(0x2ea0e400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64FcmgtSz,       new IntrinsicInfo(0x5ea0c800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FcmgtVz,       new IntrinsicInfo(0x0ea0c800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FcmleSz,       new IntrinsicInfo(0x7ea0d800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FcmleVz,       new IntrinsicInfo(0x2ea0d800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FcmltSz,       new IntrinsicInfo(0x5ea0e800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FcmltVz,       new IntrinsicInfo(0x0ea0e800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FcmpeS,        new IntrinsicInfo(0x1e202010u, IntrinsicType.ScalarFPCompare));
+            Add(Intrinsic.Arm64FcmpS,         new IntrinsicInfo(0x1e202000u, IntrinsicType.ScalarFPCompare));
+            Add(Intrinsic.Arm64FcselS,        new IntrinsicInfo(0x1e200c00u, IntrinsicType.ScalarFcsel));
+            Add(Intrinsic.Arm64FcvtasS,       new IntrinsicInfo(0x5e21c800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FcvtasV,       new IntrinsicInfo(0x0e21c800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FcvtasGp,      new IntrinsicInfo(0x1e240000u, IntrinsicType.ScalarFPConvGpr));
+            Add(Intrinsic.Arm64FcvtauS,       new IntrinsicInfo(0x7e21c800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FcvtauV,       new IntrinsicInfo(0x2e21c800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FcvtauGp,      new IntrinsicInfo(0x1e250000u, IntrinsicType.ScalarFPConvGpr));
+            Add(Intrinsic.Arm64FcvtlV,        new IntrinsicInfo(0x0e217800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FcvtmsS,       new IntrinsicInfo(0x5e21b800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FcvtmsV,       new IntrinsicInfo(0x0e21b800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FcvtmsGp,      new IntrinsicInfo(0x1e300000u, IntrinsicType.ScalarFPConvGpr));
+            Add(Intrinsic.Arm64FcvtmuS,       new IntrinsicInfo(0x7e21b800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FcvtmuV,       new IntrinsicInfo(0x2e21b800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FcvtmuGp,      new IntrinsicInfo(0x1e310000u, IntrinsicType.ScalarFPConvGpr));
+            Add(Intrinsic.Arm64FcvtnsS,       new IntrinsicInfo(0x5e21a800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FcvtnsV,       new IntrinsicInfo(0x0e21a800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FcvtnsGp,      new IntrinsicInfo(0x1e200000u, IntrinsicType.ScalarFPConvGpr));
+            Add(Intrinsic.Arm64FcvtnuS,       new IntrinsicInfo(0x7e21a800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FcvtnuV,       new IntrinsicInfo(0x2e21a800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FcvtnuGp,      new IntrinsicInfo(0x1e210000u, IntrinsicType.ScalarFPConvGpr));
+            Add(Intrinsic.Arm64FcvtnV,        new IntrinsicInfo(0x0e216800u, IntrinsicType.VectorBinaryRd));
+            Add(Intrinsic.Arm64FcvtpsS,       new IntrinsicInfo(0x5ea1a800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FcvtpsV,       new IntrinsicInfo(0x0ea1a800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FcvtpsGp,      new IntrinsicInfo(0x1e280000u, IntrinsicType.ScalarFPConvGpr));
+            Add(Intrinsic.Arm64FcvtpuS,       new IntrinsicInfo(0x7ea1a800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FcvtpuV,       new IntrinsicInfo(0x2ea1a800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FcvtpuGp,      new IntrinsicInfo(0x1e290000u, IntrinsicType.ScalarFPConvGpr));
+            Add(Intrinsic.Arm64FcvtxnS,       new IntrinsicInfo(0x7e216800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FcvtxnV,       new IntrinsicInfo(0x2e216800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FcvtzsSFixed,  new IntrinsicInfo(0x5f00fc00u, IntrinsicType.ScalarFPConvFixed));
+            Add(Intrinsic.Arm64FcvtzsVFixed,  new IntrinsicInfo(0x0f00fc00u, IntrinsicType.VectorFPConvFixed));
+            Add(Intrinsic.Arm64FcvtzsS,       new IntrinsicInfo(0x5ea1b800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FcvtzsV,       new IntrinsicInfo(0x0ea1b800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FcvtzsGpFixed, new IntrinsicInfo(0x1e180000u, IntrinsicType.ScalarFPConvFixedGpr));
+            Add(Intrinsic.Arm64FcvtzsGp,      new IntrinsicInfo(0x1e380000u, IntrinsicType.ScalarFPConvGpr));
+            Add(Intrinsic.Arm64FcvtzuSFixed,  new IntrinsicInfo(0x7f00fc00u, IntrinsicType.ScalarFPConvFixed));
+            Add(Intrinsic.Arm64FcvtzuVFixed,  new IntrinsicInfo(0x2f00fc00u, IntrinsicType.VectorFPConvFixed));
+            Add(Intrinsic.Arm64FcvtzuS,       new IntrinsicInfo(0x7ea1b800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FcvtzuV,       new IntrinsicInfo(0x2ea1b800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FcvtzuGpFixed, new IntrinsicInfo(0x1e190000u, IntrinsicType.ScalarFPConvFixedGpr));
+            Add(Intrinsic.Arm64FcvtzuGp,      new IntrinsicInfo(0x1e390000u, IntrinsicType.ScalarFPConvGpr));
+            Add(Intrinsic.Arm64FcvtS,         new IntrinsicInfo(0x1e224000u, IntrinsicType.ScalarFPConv));
+            Add(Intrinsic.Arm64FdivV,         new IntrinsicInfo(0x2e20fc00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64FdivS,         new IntrinsicInfo(0x1e201800u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64FmaddS,        new IntrinsicInfo(0x1f000000u, IntrinsicType.ScalarTernary));
+            Add(Intrinsic.Arm64FmaxnmpS,      new IntrinsicInfo(0x7e30c800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FmaxnmpV,      new IntrinsicInfo(0x2e20c400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64FmaxnmvV,      new IntrinsicInfo(0x2e30c800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FmaxnmV,       new IntrinsicInfo(0x0e20c400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64FmaxnmS,       new IntrinsicInfo(0x1e206800u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64FmaxpS,        new IntrinsicInfo(0x7e30f800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FmaxpV,        new IntrinsicInfo(0x2e20f400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64FmaxvV,        new IntrinsicInfo(0x2e30f800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FmaxV,         new IntrinsicInfo(0x0e20f400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64FmaxS,         new IntrinsicInfo(0x1e204800u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64FminnmpS,      new IntrinsicInfo(0x7eb0c800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FminnmpV,      new IntrinsicInfo(0x2ea0c400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64FminnmvV,      new IntrinsicInfo(0x2eb0c800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FminnmV,       new IntrinsicInfo(0x0ea0c400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64FminnmS,       new IntrinsicInfo(0x1e207800u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64FminpS,        new IntrinsicInfo(0x7eb0f800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FminpV,        new IntrinsicInfo(0x2ea0f400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64FminvV,        new IntrinsicInfo(0x2eb0f800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FminV,         new IntrinsicInfo(0x0ea0f400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64FminS,         new IntrinsicInfo(0x1e205800u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64FmlaSe,        new IntrinsicInfo(0x5f801000u, IntrinsicType.ScalarTernaryFPRdByElem));
+            Add(Intrinsic.Arm64FmlaVe,        new IntrinsicInfo(0x0f801000u, IntrinsicType.VectorTernaryFPRdByElem));
+            Add(Intrinsic.Arm64FmlaV,         new IntrinsicInfo(0x0e20cc00u, IntrinsicType.VectorTernaryRd));
+            Add(Intrinsic.Arm64FmlsSe,        new IntrinsicInfo(0x5f805000u, IntrinsicType.ScalarTernaryFPRdByElem));
+            Add(Intrinsic.Arm64FmlsVe,        new IntrinsicInfo(0x0f805000u, IntrinsicType.VectorTernaryFPRdByElem));
+            Add(Intrinsic.Arm64FmlsV,         new IntrinsicInfo(0x0ea0cc00u, IntrinsicType.VectorTernaryRd));
+            Add(Intrinsic.Arm64FmovVi,        new IntrinsicInfo(0x0f00f400u, IntrinsicType.VectorFmovi));
+            Add(Intrinsic.Arm64FmovS,         new IntrinsicInfo(0x1e204000u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FmovGp,        new IntrinsicInfo(0x1e260000u, IntrinsicType.ScalarFPConvGpr));
+            Add(Intrinsic.Arm64FmovSi,        new IntrinsicInfo(0x1e201000u, IntrinsicType.ScalarFmovi));
+            Add(Intrinsic.Arm64FmsubS,        new IntrinsicInfo(0x1f008000u, IntrinsicType.ScalarTernary));
+            Add(Intrinsic.Arm64FmulxSe,       new IntrinsicInfo(0x7f809000u, IntrinsicType.ScalarBinaryFPByElem));
+            Add(Intrinsic.Arm64FmulxVe,       new IntrinsicInfo(0x2f809000u, IntrinsicType.VectorBinaryFPByElem));
+            Add(Intrinsic.Arm64FmulxS,        new IntrinsicInfo(0x5e20dc00u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64FmulxV,        new IntrinsicInfo(0x0e20dc00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64FmulSe,        new IntrinsicInfo(0x5f809000u, IntrinsicType.ScalarBinaryFPByElem));
+            Add(Intrinsic.Arm64FmulVe,        new IntrinsicInfo(0x0f809000u, IntrinsicType.VectorBinaryFPByElem));
+            Add(Intrinsic.Arm64FmulV,         new IntrinsicInfo(0x2e20dc00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64FmulS,         new IntrinsicInfo(0x1e200800u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64FnegV,         new IntrinsicInfo(0x2ea0f800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FnegS,         new IntrinsicInfo(0x1e214000u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FnmaddS,       new IntrinsicInfo(0x1f200000u, IntrinsicType.ScalarTernary));
+            Add(Intrinsic.Arm64FnmsubS,       new IntrinsicInfo(0x1f208000u, IntrinsicType.ScalarTernary));
+            Add(Intrinsic.Arm64FnmulS,        new IntrinsicInfo(0x1e208800u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64FrecpeS,       new IntrinsicInfo(0x5ea1d800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FrecpeV,       new IntrinsicInfo(0x0ea1d800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FrecpsS,       new IntrinsicInfo(0x5e20fc00u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64FrecpsV,       new IntrinsicInfo(0x0e20fc00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64FrecpxS,       new IntrinsicInfo(0x5ea1f800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FrintaV,       new IntrinsicInfo(0x2e218800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FrintaS,       new IntrinsicInfo(0x1e264000u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FrintiV,       new IntrinsicInfo(0x2ea19800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FrintiS,       new IntrinsicInfo(0x1e27c000u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FrintmV,       new IntrinsicInfo(0x0e219800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FrintmS,       new IntrinsicInfo(0x1e254000u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FrintnV,       new IntrinsicInfo(0x0e218800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FrintnS,       new IntrinsicInfo(0x1e244000u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FrintpV,       new IntrinsicInfo(0x0ea18800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FrintpS,       new IntrinsicInfo(0x1e24c000u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FrintxV,       new IntrinsicInfo(0x2e219800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FrintxS,       new IntrinsicInfo(0x1e274000u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FrintzV,       new IntrinsicInfo(0x0ea19800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FrintzS,       new IntrinsicInfo(0x1e25c000u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FrsqrteS,      new IntrinsicInfo(0x7ea1d800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FrsqrteV,      new IntrinsicInfo(0x2ea1d800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FrsqrtsS,      new IntrinsicInfo(0x5ea0fc00u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64FrsqrtsV,      new IntrinsicInfo(0x0ea0fc00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64FsqrtV,        new IntrinsicInfo(0x2ea1f800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64FsqrtS,        new IntrinsicInfo(0x1e21c000u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64FsubV,         new IntrinsicInfo(0x0ea0d400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64FsubS,         new IntrinsicInfo(0x1e203800u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64InsVe,         new IntrinsicInfo(0x6e000400u, IntrinsicType.VectorInsertByElem));
+            Add(Intrinsic.Arm64InsGp,         new IntrinsicInfo(0x4e001c00u, IntrinsicType.ScalarUnaryByElem));
+            Add(Intrinsic.Arm64Ld1rV,         new IntrinsicInfo(0x0d40c000u, IntrinsicType.VectorLdSt));
+            Add(Intrinsic.Arm64Ld1Vms,        new IntrinsicInfo(0x0c402000u, IntrinsicType.VectorLdSt));
+            Add(Intrinsic.Arm64Ld1Vss,        new IntrinsicInfo(0x0d400000u, IntrinsicType.VectorLdStSs));
+            Add(Intrinsic.Arm64Ld2rV,         new IntrinsicInfo(0x0d60c000u, IntrinsicType.VectorLdSt));
+            Add(Intrinsic.Arm64Ld2Vms,        new IntrinsicInfo(0x0c408000u, IntrinsicType.VectorLdSt));
+            Add(Intrinsic.Arm64Ld2Vss,        new IntrinsicInfo(0x0d600000u, IntrinsicType.VectorLdStSs));
+            Add(Intrinsic.Arm64Ld3rV,         new IntrinsicInfo(0x0d40e000u, IntrinsicType.VectorLdSt));
+            Add(Intrinsic.Arm64Ld3Vms,        new IntrinsicInfo(0x0c404000u, IntrinsicType.VectorLdSt));
+            Add(Intrinsic.Arm64Ld3Vss,        new IntrinsicInfo(0x0d402000u, IntrinsicType.VectorLdStSs));
+            Add(Intrinsic.Arm64Ld4rV,         new IntrinsicInfo(0x0d60e000u, IntrinsicType.VectorLdSt));
+            Add(Intrinsic.Arm64Ld4Vms,        new IntrinsicInfo(0x0c400000u, IntrinsicType.VectorLdSt));
+            Add(Intrinsic.Arm64Ld4Vss,        new IntrinsicInfo(0x0d602000u, IntrinsicType.VectorLdStSs));
+            Add(Intrinsic.Arm64MlaVe,         new IntrinsicInfo(0x2f000000u, IntrinsicType.VectorTernaryRdByElem));
+            Add(Intrinsic.Arm64MlaV,          new IntrinsicInfo(0x0e209400u, IntrinsicType.VectorTernaryRd));
+            Add(Intrinsic.Arm64MlsVe,         new IntrinsicInfo(0x2f004000u, IntrinsicType.VectorTernaryRdByElem));
+            Add(Intrinsic.Arm64MlsV,          new IntrinsicInfo(0x2e209400u, IntrinsicType.VectorTernaryRd));
+            Add(Intrinsic.Arm64MoviV,         new IntrinsicInfo(0x0f000400u, IntrinsicType.VectorMovi));
+            Add(Intrinsic.Arm64MrsFpsr,       new IntrinsicInfo(0xd53b4420u, IntrinsicType.GetRegister));
+            Add(Intrinsic.Arm64MsrFpsr,       new IntrinsicInfo(0xd51b4420u, IntrinsicType.SetRegister));
+            Add(Intrinsic.Arm64MulVe,         new IntrinsicInfo(0x0f008000u, IntrinsicType.VectorBinaryByElem));
+            Add(Intrinsic.Arm64MulV,          new IntrinsicInfo(0x0e209c00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64MvniV,         new IntrinsicInfo(0x2f000400u, IntrinsicType.VectorMvni));
+            Add(Intrinsic.Arm64NegS,          new IntrinsicInfo(0x7e20b800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64NegV,          new IntrinsicInfo(0x2e20b800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64NotV,          new IntrinsicInfo(0x2e205800u, IntrinsicType.VectorUnaryBitwise));
+            Add(Intrinsic.Arm64OrnV,          new IntrinsicInfo(0x0ee01c00u, IntrinsicType.VectorBinaryBitwise));
+            Add(Intrinsic.Arm64OrrVi,         new IntrinsicInfo(0x0f001400u, IntrinsicType.VectorBinaryBitwiseImm));
+            Add(Intrinsic.Arm64OrrV,          new IntrinsicInfo(0x0ea01c00u, IntrinsicType.VectorBinaryBitwise));
+            Add(Intrinsic.Arm64PmullV,        new IntrinsicInfo(0x0e20e000u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64PmulV,         new IntrinsicInfo(0x2e209c00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64RaddhnV,       new IntrinsicInfo(0x2e204000u, IntrinsicType.VectorTernaryRd));
+            Add(Intrinsic.Arm64RbitV,         new IntrinsicInfo(0x2e605800u, IntrinsicType.VectorUnaryBitwise));
+            Add(Intrinsic.Arm64Rev16V,        new IntrinsicInfo(0x0e201800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64Rev32V,        new IntrinsicInfo(0x2e200800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64Rev64V,        new IntrinsicInfo(0x0e200800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64RshrnV,        new IntrinsicInfo(0x0f008c00u, IntrinsicType.VectorTernaryShrRd));
+            Add(Intrinsic.Arm64RsubhnV,       new IntrinsicInfo(0x2e206000u, IntrinsicType.VectorTernaryRd));
+            Add(Intrinsic.Arm64SabalV,        new IntrinsicInfo(0x0e205000u, IntrinsicType.VectorTernaryRd));
+            Add(Intrinsic.Arm64SabaV,         new IntrinsicInfo(0x0e207c00u, IntrinsicType.VectorTernaryRd));
+            Add(Intrinsic.Arm64SabdlV,        new IntrinsicInfo(0x0e207000u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64SabdV,         new IntrinsicInfo(0x0e207400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64SadalpV,       new IntrinsicInfo(0x0e206800u, IntrinsicType.VectorBinaryRd));
+            Add(Intrinsic.Arm64SaddlpV,       new IntrinsicInfo(0x0e202800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64SaddlvV,       new IntrinsicInfo(0x0e303800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64SaddlV,        new IntrinsicInfo(0x0e200000u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64SaddwV,        new IntrinsicInfo(0x0e201000u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64ScvtfSFixed,   new IntrinsicInfo(0x5f00e400u, IntrinsicType.ScalarFPConvFixed));
+            Add(Intrinsic.Arm64ScvtfVFixed,   new IntrinsicInfo(0x0f00e400u, IntrinsicType.VectorFPConvFixed));
+            Add(Intrinsic.Arm64ScvtfS,        new IntrinsicInfo(0x5e21d800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64ScvtfV,        new IntrinsicInfo(0x0e21d800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64ScvtfGpFixed,  new IntrinsicInfo(0x1e020000u, IntrinsicType.ScalarFPConvFixedGpr));
+            Add(Intrinsic.Arm64ScvtfGp,       new IntrinsicInfo(0x1e220000u, IntrinsicType.ScalarFPConvGpr));
+            Add(Intrinsic.Arm64Sha1cV,        new IntrinsicInfo(0x5e000000u, IntrinsicType.Vector128Binary));
+            Add(Intrinsic.Arm64Sha1hV,        new IntrinsicInfo(0x5e280800u, IntrinsicType.Vector128Unary));
+            Add(Intrinsic.Arm64Sha1mV,        new IntrinsicInfo(0x5e002000u, IntrinsicType.Vector128Binary));
+            Add(Intrinsic.Arm64Sha1pV,        new IntrinsicInfo(0x5e001000u, IntrinsicType.Vector128Binary));
+            Add(Intrinsic.Arm64Sha1su0V,      new IntrinsicInfo(0x5e003000u, IntrinsicType.Vector128Binary));
+            Add(Intrinsic.Arm64Sha1su1V,      new IntrinsicInfo(0x5e281800u, IntrinsicType.Vector128Unary));
+            Add(Intrinsic.Arm64Sha256h2V,     new IntrinsicInfo(0x5e005000u, IntrinsicType.Vector128Binary));
+            Add(Intrinsic.Arm64Sha256hV,      new IntrinsicInfo(0x5e004000u, IntrinsicType.Vector128Binary));
+            Add(Intrinsic.Arm64Sha256su0V,    new IntrinsicInfo(0x5e282800u, IntrinsicType.Vector128Unary));
+            Add(Intrinsic.Arm64Sha256su1V,    new IntrinsicInfo(0x5e006000u, IntrinsicType.Vector128Binary));
+            Add(Intrinsic.Arm64ShaddV,        new IntrinsicInfo(0x0e200400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64ShllV,         new IntrinsicInfo(0x2e213800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64ShlS,          new IntrinsicInfo(0x5f005400u, IntrinsicType.ScalarBinaryShl));
+            Add(Intrinsic.Arm64ShlV,          new IntrinsicInfo(0x0f005400u, IntrinsicType.VectorBinaryShl));
+            Add(Intrinsic.Arm64ShrnV,         new IntrinsicInfo(0x0f008400u, IntrinsicType.VectorTernaryShrRd));
+            Add(Intrinsic.Arm64ShsubV,        new IntrinsicInfo(0x0e202400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64SliS,          new IntrinsicInfo(0x7f005400u, IntrinsicType.ScalarTernaryShlRd));
+            Add(Intrinsic.Arm64SliV,          new IntrinsicInfo(0x2f005400u, IntrinsicType.VectorTernaryShlRd));
+            Add(Intrinsic.Arm64SmaxpV,        new IntrinsicInfo(0x0e20a400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64SmaxvV,        new IntrinsicInfo(0x0e30a800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64SmaxV,         new IntrinsicInfo(0x0e206400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64SminpV,        new IntrinsicInfo(0x0e20ac00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64SminvV,        new IntrinsicInfo(0x0e31a800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64SminV,         new IntrinsicInfo(0x0e206c00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64SmlalVe,       new IntrinsicInfo(0x0f002000u, IntrinsicType.VectorTernaryRdByElem));
+            Add(Intrinsic.Arm64SmlalV,        new IntrinsicInfo(0x0e208000u, IntrinsicType.VectorTernaryRd));
+            Add(Intrinsic.Arm64SmlslVe,       new IntrinsicInfo(0x0f006000u, IntrinsicType.VectorTernaryRdByElem));
+            Add(Intrinsic.Arm64SmlslV,        new IntrinsicInfo(0x0e20a000u, IntrinsicType.VectorTernaryRd));
+            Add(Intrinsic.Arm64SmovV,         new IntrinsicInfo(0x0e002c00u, IntrinsicType.VectorUnaryByElem));
+            Add(Intrinsic.Arm64SmullVe,       new IntrinsicInfo(0x0f00a000u, IntrinsicType.VectorBinaryByElem));
+            Add(Intrinsic.Arm64SmullV,        new IntrinsicInfo(0x0e20c000u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64SqabsS,        new IntrinsicInfo(0x5e207800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64SqabsV,        new IntrinsicInfo(0x0e207800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64SqaddS,        new IntrinsicInfo(0x5e200c00u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64SqaddV,        new IntrinsicInfo(0x0e200c00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64SqdmlalSe,     new IntrinsicInfo(0x5f003000u, IntrinsicType.ScalarBinaryByElem));
+            Add(Intrinsic.Arm64SqdmlalVe,     new IntrinsicInfo(0x0f003000u, IntrinsicType.VectorBinaryByElem));
+            Add(Intrinsic.Arm64SqdmlalS,      new IntrinsicInfo(0x5e209000u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64SqdmlalV,      new IntrinsicInfo(0x0e209000u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64SqdmlslSe,     new IntrinsicInfo(0x5f007000u, IntrinsicType.ScalarBinaryByElem));
+            Add(Intrinsic.Arm64SqdmlslVe,     new IntrinsicInfo(0x0f007000u, IntrinsicType.VectorBinaryByElem));
+            Add(Intrinsic.Arm64SqdmlslS,      new IntrinsicInfo(0x5e20b000u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64SqdmlslV,      new IntrinsicInfo(0x0e20b000u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64SqdmulhSe,     new IntrinsicInfo(0x5f00c000u, IntrinsicType.ScalarBinaryByElem));
+            Add(Intrinsic.Arm64SqdmulhVe,     new IntrinsicInfo(0x0f00c000u, IntrinsicType.VectorBinaryByElem));
+            Add(Intrinsic.Arm64SqdmulhS,      new IntrinsicInfo(0x5e20b400u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64SqdmulhV,      new IntrinsicInfo(0x0e20b400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64SqdmullSe,     new IntrinsicInfo(0x5f00b000u, IntrinsicType.ScalarBinaryByElem));
+            Add(Intrinsic.Arm64SqdmullVe,     new IntrinsicInfo(0x0f00b000u, IntrinsicType.VectorBinaryByElem));
+            Add(Intrinsic.Arm64SqdmullS,      new IntrinsicInfo(0x5e20d000u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64SqdmullV,      new IntrinsicInfo(0x0e20d000u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64SqnegS,        new IntrinsicInfo(0x7e207800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64SqnegV,        new IntrinsicInfo(0x2e207800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64SqrdmulhSe,    new IntrinsicInfo(0x5f00d000u, IntrinsicType.ScalarBinaryByElem));
+            Add(Intrinsic.Arm64SqrdmulhVe,    new IntrinsicInfo(0x0f00d000u, IntrinsicType.VectorBinaryByElem));
+            Add(Intrinsic.Arm64SqrdmulhS,     new IntrinsicInfo(0x7e20b400u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64SqrdmulhV,     new IntrinsicInfo(0x2e20b400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64SqrshlS,       new IntrinsicInfo(0x5e205c00u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64SqrshlV,       new IntrinsicInfo(0x0e205c00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64SqrshrnS,      new IntrinsicInfo(0x5f009c00u, IntrinsicType.ScalarTernaryShrRd));
+            Add(Intrinsic.Arm64SqrshrnV,      new IntrinsicInfo(0x0f009c00u, IntrinsicType.VectorTernaryShrRd));
+            Add(Intrinsic.Arm64SqrshrunS,     new IntrinsicInfo(0x7f008c00u, IntrinsicType.ScalarTernaryShrRd));
+            Add(Intrinsic.Arm64SqrshrunV,     new IntrinsicInfo(0x2f008c00u, IntrinsicType.VectorTernaryShrRd));
+            Add(Intrinsic.Arm64SqshluS,       new IntrinsicInfo(0x7f006400u, IntrinsicType.ScalarBinaryShl));
+            Add(Intrinsic.Arm64SqshluV,       new IntrinsicInfo(0x2f006400u, IntrinsicType.VectorBinaryShl));
+            Add(Intrinsic.Arm64SqshlSi,       new IntrinsicInfo(0x5f007400u, IntrinsicType.ScalarBinaryShl));
+            Add(Intrinsic.Arm64SqshlVi,       new IntrinsicInfo(0x0f007400u, IntrinsicType.VectorBinaryShl));
+            Add(Intrinsic.Arm64SqshlS,        new IntrinsicInfo(0x5e204c00u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64SqshlV,        new IntrinsicInfo(0x0e204c00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64SqshrnS,       new IntrinsicInfo(0x5f009400u, IntrinsicType.ScalarTernaryShrRd));
+            Add(Intrinsic.Arm64SqshrnV,       new IntrinsicInfo(0x0f009400u, IntrinsicType.VectorTernaryShrRd));
+            Add(Intrinsic.Arm64SqshrunS,      new IntrinsicInfo(0x7f008400u, IntrinsicType.ScalarTernaryShrRd));
+            Add(Intrinsic.Arm64SqshrunV,      new IntrinsicInfo(0x2f008400u, IntrinsicType.VectorTernaryShrRd));
+            Add(Intrinsic.Arm64SqsubS,        new IntrinsicInfo(0x5e202c00u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64SqsubV,        new IntrinsicInfo(0x0e202c00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64SqxtnS,        new IntrinsicInfo(0x5e214800u, IntrinsicType.ScalarBinaryRd));
+            Add(Intrinsic.Arm64SqxtnV,        new IntrinsicInfo(0x0e214800u, IntrinsicType.VectorBinaryRd));
+            Add(Intrinsic.Arm64SqxtunS,       new IntrinsicInfo(0x7e212800u, IntrinsicType.ScalarBinaryRd));
+            Add(Intrinsic.Arm64SqxtunV,       new IntrinsicInfo(0x2e212800u, IntrinsicType.VectorBinaryRd));
+            Add(Intrinsic.Arm64SrhaddV,       new IntrinsicInfo(0x0e201400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64SriS,          new IntrinsicInfo(0x7f004400u, IntrinsicType.ScalarTernaryShrRd));
+            Add(Intrinsic.Arm64SriV,          new IntrinsicInfo(0x2f004400u, IntrinsicType.VectorTernaryShrRd));
+            Add(Intrinsic.Arm64SrshlS,        new IntrinsicInfo(0x5e205400u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64SrshlV,        new IntrinsicInfo(0x0e205400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64SrshrS,        new IntrinsicInfo(0x5f002400u, IntrinsicType.ScalarBinaryShr));
+            Add(Intrinsic.Arm64SrshrV,        new IntrinsicInfo(0x0f002400u, IntrinsicType.VectorBinaryShr));
+            Add(Intrinsic.Arm64SrsraS,        new IntrinsicInfo(0x5f003400u, IntrinsicType.ScalarTernaryShrRd));
+            Add(Intrinsic.Arm64SrsraV,        new IntrinsicInfo(0x0f003400u, IntrinsicType.VectorTernaryShrRd));
+            Add(Intrinsic.Arm64SshllV,        new IntrinsicInfo(0x0f00a400u, IntrinsicType.VectorBinaryShl));
+            Add(Intrinsic.Arm64SshlS,         new IntrinsicInfo(0x5e204400u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64SshlV,         new IntrinsicInfo(0x0e204400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64SshrS,         new IntrinsicInfo(0x5f000400u, IntrinsicType.ScalarBinaryShr));
+            Add(Intrinsic.Arm64SshrV,         new IntrinsicInfo(0x0f000400u, IntrinsicType.VectorBinaryShr));
+            Add(Intrinsic.Arm64SsraS,         new IntrinsicInfo(0x5f001400u, IntrinsicType.ScalarTernaryShrRd));
+            Add(Intrinsic.Arm64SsraV,         new IntrinsicInfo(0x0f001400u, IntrinsicType.VectorTernaryShrRd));
+            Add(Intrinsic.Arm64SsublV,        new IntrinsicInfo(0x0e202000u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64SsubwV,        new IntrinsicInfo(0x0e203000u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64St1Vms,        new IntrinsicInfo(0x0c002000u, IntrinsicType.VectorLdSt));
+            Add(Intrinsic.Arm64St1Vss,        new IntrinsicInfo(0x0d000000u, IntrinsicType.VectorLdStSs));
+            Add(Intrinsic.Arm64St2Vms,        new IntrinsicInfo(0x0c008000u, IntrinsicType.VectorLdSt));
+            Add(Intrinsic.Arm64St2Vss,        new IntrinsicInfo(0x0d200000u, IntrinsicType.VectorLdStSs));
+            Add(Intrinsic.Arm64St3Vms,        new IntrinsicInfo(0x0c004000u, IntrinsicType.VectorLdSt));
+            Add(Intrinsic.Arm64St3Vss,        new IntrinsicInfo(0x0d002000u, IntrinsicType.VectorLdStSs));
+            Add(Intrinsic.Arm64St4Vms,        new IntrinsicInfo(0x0c000000u, IntrinsicType.VectorLdSt));
+            Add(Intrinsic.Arm64St4Vss,        new IntrinsicInfo(0x0d202000u, IntrinsicType.VectorLdStSs));
+            Add(Intrinsic.Arm64SubhnV,        new IntrinsicInfo(0x0e206000u, IntrinsicType.VectorTernaryRd));
+            Add(Intrinsic.Arm64SubS,          new IntrinsicInfo(0x7e208400u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64SubV,          new IntrinsicInfo(0x2e208400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64SuqaddS,       new IntrinsicInfo(0x5e203800u, IntrinsicType.ScalarBinaryRd));
+            Add(Intrinsic.Arm64SuqaddV,       new IntrinsicInfo(0x0e203800u, IntrinsicType.VectorBinaryRd));
+            Add(Intrinsic.Arm64TblV,          new IntrinsicInfo(0x0e000000u, IntrinsicType.VectorLookupTable));
+            Add(Intrinsic.Arm64TbxV,          new IntrinsicInfo(0x0e001000u, IntrinsicType.VectorLookupTable));
+            Add(Intrinsic.Arm64Trn1V,         new IntrinsicInfo(0x0e002800u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64Trn2V,         new IntrinsicInfo(0x0e006800u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64UabalV,        new IntrinsicInfo(0x2e205000u, IntrinsicType.VectorTernaryRd));
+            Add(Intrinsic.Arm64UabaV,         new IntrinsicInfo(0x2e207c00u, IntrinsicType.VectorTernaryRd));
+            Add(Intrinsic.Arm64UabdlV,        new IntrinsicInfo(0x2e207000u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64UabdV,         new IntrinsicInfo(0x2e207400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64UadalpV,       new IntrinsicInfo(0x2e206800u, IntrinsicType.VectorBinaryRd));
+            Add(Intrinsic.Arm64UaddlpV,       new IntrinsicInfo(0x2e202800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64UaddlvV,       new IntrinsicInfo(0x2e303800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64UaddlV,        new IntrinsicInfo(0x2e200000u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64UaddwV,        new IntrinsicInfo(0x2e201000u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64UcvtfSFixed,   new IntrinsicInfo(0x7f00e400u, IntrinsicType.ScalarFPConvFixed));
+            Add(Intrinsic.Arm64UcvtfVFixed,   new IntrinsicInfo(0x2f00e400u, IntrinsicType.VectorFPConvFixed));
+            Add(Intrinsic.Arm64UcvtfS,        new IntrinsicInfo(0x7e21d800u, IntrinsicType.ScalarUnary));
+            Add(Intrinsic.Arm64UcvtfV,        new IntrinsicInfo(0x2e21d800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64UcvtfGpFixed,  new IntrinsicInfo(0x1e030000u, IntrinsicType.ScalarFPConvFixedGpr));
+            Add(Intrinsic.Arm64UcvtfGp,       new IntrinsicInfo(0x1e230000u, IntrinsicType.ScalarFPConvGpr));
+            Add(Intrinsic.Arm64UhaddV,        new IntrinsicInfo(0x2e200400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64UhsubV,        new IntrinsicInfo(0x2e202400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64UmaxpV,        new IntrinsicInfo(0x2e20a400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64UmaxvV,        new IntrinsicInfo(0x2e30a800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64UmaxV,         new IntrinsicInfo(0x2e206400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64UminpV,        new IntrinsicInfo(0x2e20ac00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64UminvV,        new IntrinsicInfo(0x2e31a800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64UminV,         new IntrinsicInfo(0x2e206c00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64UmlalVe,       new IntrinsicInfo(0x2f002000u, IntrinsicType.VectorTernaryRdByElem));
+            Add(Intrinsic.Arm64UmlalV,        new IntrinsicInfo(0x2e208000u, IntrinsicType.VectorTernaryRd));
+            Add(Intrinsic.Arm64UmlslVe,       new IntrinsicInfo(0x2f006000u, IntrinsicType.VectorTernaryRdByElem));
+            Add(Intrinsic.Arm64UmlslV,        new IntrinsicInfo(0x2e20a000u, IntrinsicType.VectorTernaryRd));
+            Add(Intrinsic.Arm64UmovV,         new IntrinsicInfo(0x0e003c00u, IntrinsicType.VectorUnaryByElem));
+            Add(Intrinsic.Arm64UmullVe,       new IntrinsicInfo(0x2f00a000u, IntrinsicType.VectorBinaryByElem));
+            Add(Intrinsic.Arm64UmullV,        new IntrinsicInfo(0x2e20c000u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64UqaddS,        new IntrinsicInfo(0x7e200c00u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64UqaddV,        new IntrinsicInfo(0x2e200c00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64UqrshlS,       new IntrinsicInfo(0x7e205c00u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64UqrshlV,       new IntrinsicInfo(0x2e205c00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64UqrshrnS,      new IntrinsicInfo(0x7f009c00u, IntrinsicType.ScalarTernaryShrRd));
+            Add(Intrinsic.Arm64UqrshrnV,      new IntrinsicInfo(0x2f009c00u, IntrinsicType.VectorTernaryShrRd));
+            Add(Intrinsic.Arm64UqshlSi,       new IntrinsicInfo(0x7f007400u, IntrinsicType.ScalarBinaryShl));
+            Add(Intrinsic.Arm64UqshlVi,       new IntrinsicInfo(0x2f007400u, IntrinsicType.VectorBinaryShl));
+            Add(Intrinsic.Arm64UqshlS,        new IntrinsicInfo(0x7e204c00u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64UqshlV,        new IntrinsicInfo(0x2e204c00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64UqshrnS,       new IntrinsicInfo(0x7f009400u, IntrinsicType.ScalarTernaryShrRd));
+            Add(Intrinsic.Arm64UqshrnV,       new IntrinsicInfo(0x2f009400u, IntrinsicType.VectorTernaryShrRd));
+            Add(Intrinsic.Arm64UqsubS,        new IntrinsicInfo(0x7e202c00u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64UqsubV,        new IntrinsicInfo(0x2e202c00u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64UqxtnS,        new IntrinsicInfo(0x7e214800u, IntrinsicType.ScalarBinaryRd));
+            Add(Intrinsic.Arm64UqxtnV,        new IntrinsicInfo(0x2e214800u, IntrinsicType.VectorBinaryRd));
+            Add(Intrinsic.Arm64UrecpeV,       new IntrinsicInfo(0x0ea1c800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64UrhaddV,       new IntrinsicInfo(0x2e201400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64UrshlS,        new IntrinsicInfo(0x7e205400u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64UrshlV,        new IntrinsicInfo(0x2e205400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64UrshrS,        new IntrinsicInfo(0x7f002400u, IntrinsicType.ScalarBinaryShr));
+            Add(Intrinsic.Arm64UrshrV,        new IntrinsicInfo(0x2f002400u, IntrinsicType.VectorBinaryShr));
+            Add(Intrinsic.Arm64UrsqrteV,      new IntrinsicInfo(0x2ea1c800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64UrsraS,        new IntrinsicInfo(0x7f003400u, IntrinsicType.ScalarTernaryShrRd));
+            Add(Intrinsic.Arm64UrsraV,        new IntrinsicInfo(0x2f003400u, IntrinsicType.VectorTernaryShrRd));
+            Add(Intrinsic.Arm64UshllV,        new IntrinsicInfo(0x2f00a400u, IntrinsicType.VectorBinaryShl));
+            Add(Intrinsic.Arm64UshlS,         new IntrinsicInfo(0x7e204400u, IntrinsicType.ScalarBinary));
+            Add(Intrinsic.Arm64UshlV,         new IntrinsicInfo(0x2e204400u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64UshrS,         new IntrinsicInfo(0x7f000400u, IntrinsicType.ScalarBinaryShr));
+            Add(Intrinsic.Arm64UshrV,         new IntrinsicInfo(0x2f000400u, IntrinsicType.VectorBinaryShr));
+            Add(Intrinsic.Arm64UsqaddS,       new IntrinsicInfo(0x7e203800u, IntrinsicType.ScalarBinaryRd));
+            Add(Intrinsic.Arm64UsqaddV,       new IntrinsicInfo(0x2e203800u, IntrinsicType.VectorBinaryRd));
+            Add(Intrinsic.Arm64UsraS,         new IntrinsicInfo(0x7f001400u, IntrinsicType.ScalarTernaryShrRd));
+            Add(Intrinsic.Arm64UsraV,         new IntrinsicInfo(0x2f001400u, IntrinsicType.VectorTernaryShrRd));
+            Add(Intrinsic.Arm64UsublV,        new IntrinsicInfo(0x2e202000u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64UsubwV,        new IntrinsicInfo(0x2e203000u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64Uzp1V,         new IntrinsicInfo(0x0e001800u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64Uzp2V,         new IntrinsicInfo(0x0e005800u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64XtnV,          new IntrinsicInfo(0x0e212800u, IntrinsicType.VectorUnary));
+            Add(Intrinsic.Arm64Zip1V,         new IntrinsicInfo(0x0e003800u, IntrinsicType.VectorBinary));
+            Add(Intrinsic.Arm64Zip2V,         new IntrinsicInfo(0x0e007800u, IntrinsicType.VectorBinary));
+        }
+
+        private static void Add(Intrinsic intrin, IntrinsicInfo info)
+        {
+            _intrinTable[(int)intrin] = info;
+        }
+
+        public static IntrinsicInfo GetInfo(Intrinsic intrin)
+        {
+            return _intrinTable[(int)intrin];
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs b/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs
new file mode 100644
index 0000000000..800eca93c1
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs
@@ -0,0 +1,59 @@
+namespace ARMeilleure.CodeGen.Arm64
+{
+    enum IntrinsicType
+    {
+        ScalarUnary,
+        ScalarUnaryByElem,
+        ScalarBinary,
+        ScalarBinaryByElem,
+        ScalarBinaryFPByElem,
+        ScalarBinaryRd,
+        ScalarBinaryShl,
+        ScalarBinaryShr,
+        ScalarFcsel,
+        ScalarFmovi,
+        ScalarFPCompare,
+        ScalarFPCompareCond,
+        ScalarFPConv,
+        ScalarFPConvFixed,
+        ScalarFPConvFixedGpr,
+        ScalarFPConvGpr,
+        ScalarTernary,
+        ScalarTernaryFPRdByElem,
+        ScalarTernaryShlRd,
+        ScalarTernaryShrRd,
+
+        VectorUnary,
+        VectorUnaryBitwise,
+        VectorUnaryByElem,
+        VectorBinary,
+        VectorBinaryBitwise,
+        VectorBinaryBitwiseImm,
+        VectorBinaryByElem,
+        VectorBinaryFPByElem,
+        VectorBinaryRd,
+        VectorBinaryShl,
+        VectorBinaryShr,
+        VectorExt,
+        VectorFmovi,
+        VectorFPConvFixed,
+        VectorInsertByElem,
+        VectorLdSt,
+        VectorLdStSs,
+        VectorLookupTable,
+        VectorMovi,
+        VectorMvni,
+        VectorTernaryFPRdByElem,
+        VectorTernaryRd,
+        VectorTernaryRdBitwise,
+        VectorTernaryRdByElem,
+        VectorTernaryShlRd,
+        VectorTernaryShrRd,
+
+        Vector128Unary,
+        Vector128Binary,
+
+        GetRegister,
+        SetRegister
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Arm64/PreAllocator.cs b/ARMeilleure/CodeGen/Arm64/PreAllocator.cs
new file mode 100644
index 0000000000..a7f0739468
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/PreAllocator.cs
@@ -0,0 +1,940 @@
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+    class PreAllocator
+    {
+        private class ConstantDict
+        {
+            private readonly Dictionary<(ulong, OperandType), Operand> _constants;
+
+            public ConstantDict()
+            {
+                _constants = new Dictionary<(ulong, OperandType), Operand>();
+            }
+
+            public void Add(ulong value, OperandType type, Operand local)
+            {
+                _constants.Add((value, type), local);
+            }
+
+            public bool TryGetValue(ulong value, OperandType type, out Operand local)
+            {
+                return _constants.TryGetValue((value, type), out local);
+            }
+        }
+
+        public static void RunPass(CompilerContext cctx, StackAllocator stackAlloc, out int maxCallArgs)
+        {
+            maxCallArgs = -1;
+
+            Span<Operation> buffer = default;
+
+            Operand[] preservedArgs = new Operand[CallingConvention.GetArgumentsOnRegsCount()];
+
+            for (BasicBlock block = cctx.Cfg.Blocks.First; block != null; block = block.ListNext)
+            {
+                ConstantDict constants = new ConstantDict();
+
+                Operation nextNode;
+
+                for (Operation node = block.Operations.First; node != default; node = nextNode)
+                {
+                    nextNode = node.ListNext;
+
+                    if (node.Instruction == Instruction.Phi)
+                    {
+                        continue;
+                    }
+
+                    HandleConstantRegCopy(constants, block.Operations, node);
+                    HandleDestructiveRegCopy(block.Operations, node);
+
+                    switch (node.Instruction)
+                    {
+                        case Instruction.Call:
+                            // Get the maximum number of arguments used on a call.
+                            // On windows, when a struct is returned from the call,
+                            // we also need to pass the pointer where the struct
+                            // should be written on the first argument.
+                            int argsCount = node.SourcesCount - 1;
+
+                            if (node.Destination != default && node.Destination.Type == OperandType.V128)
+                            {
+                                argsCount++;
+                            }
+
+                            if (maxCallArgs < argsCount)
+                            {
+                                maxCallArgs = argsCount;
+                            }
+
+                            // Copy values to registers expected by the function
+                            // being called, as mandated by the ABI.
+                            HandleCall(constants, block.Operations, node);
+                            break;
+                        case Instruction.CompareAndSwap:
+                        case Instruction.CompareAndSwap16:
+                        case Instruction.CompareAndSwap8:
+                            nextNode = HandleCompareAndSwap(block.Operations, node);
+                            break;
+                        case Instruction.LoadArgument:
+                            nextNode = HandleLoadArgument(cctx, ref buffer, block.Operations, preservedArgs, node);
+                            break;
+                        case Instruction.Return:
+                            HandleReturn(block.Operations, node);
+                            break;
+                        case Instruction.Tailcall:
+                            HandleTailcall(constants, block.Operations, stackAlloc, node, node);
+                            break;
+                    }
+                }
+            }
+        }
+
+        private static void HandleConstantRegCopy(ConstantDict constants, IntrusiveList<Operation> nodes, Operation node)
+        {
+            if (node.SourcesCount == 0 || IsIntrinsicWithConst(node))
+            {
+                return;
+            }
+
+            Instruction inst = node.Instruction;
+
+            Operand src1 = node.GetSource(0);
+            Operand src2;
+
+            if (src1.Kind == OperandKind.Constant)
+            {
+                if (!src1.Type.IsInteger())
+                {
+                    // Handle non-integer types (FP32, FP64 and V128).
+                    // For instructions without an immediate operand, we do the following:
+                    // - Insert a copy with the constant value (as integer) to a GPR.
+                    // - Insert a copy from the GPR to a XMM register.
+                    // - Replace the constant use with the XMM register.
+                    src1 = AddFloatConstantCopy(constants, nodes, node, src1);
+
+                    node.SetSource(0, src1);
+                }
+                else if (!HasConstSrc1(node, src1.Value))
+                {
+                    // Handle integer types.
+                    // Most ALU instructions accepts a 32-bits immediate on the second operand.
+                    // We need to ensure the following:
+                    // - If the constant is on operand 1, we need to move it.
+                    // -- But first, we try to swap operand 1 and 2 if the instruction is commutative.
+                    // -- Doing so may allow us to encode the constant as operand 2 and avoid a copy.
+                    // - If the constant is on operand 2, we check if the instruction supports it,
+                    // if not, we also add a copy. 64-bits constants are usually not supported.
+                    if (IsCommutative(node))
+                    {
+                        src2 = node.GetSource(1);
+
+                        Operand temp = src1;
+
+                        src1 = src2;
+                        src2 = temp;
+
+                        node.SetSource(0, src1);
+                        node.SetSource(1, src2);
+                    }
+
+                    if (src1.Kind == OperandKind.Constant)
+                    {
+                        src1 = AddIntConstantCopy(constants, nodes, node, src1);
+
+                        node.SetSource(0, src1);
+                    }
+                }
+            }
+
+            if (node.SourcesCount < 2)
+            {
+                return;
+            }
+
+            src2 = node.GetSource(1);
+
+            if (src2.Kind == OperandKind.Constant)
+            {
+                if (!src2.Type.IsInteger())
+                {
+                    src2 = AddFloatConstantCopy(constants, nodes, node, src2);
+
+                    node.SetSource(1, src2);
+                }
+                else if (!HasConstSrc2(inst, src2))
+                {
+                    src2 = AddIntConstantCopy(constants, nodes, node, src2);
+
+                    node.SetSource(1, src2);
+                }
+            }
+
+            if (node.SourcesCount < 3 ||
+                node.Instruction == Instruction.BranchIf ||
+                node.Instruction == Instruction.Compare ||
+                node.Instruction == Instruction.VectorInsert ||
+                node.Instruction == Instruction.VectorInsert16 ||
+                node.Instruction == Instruction.VectorInsert8)
+            {
+                return;
+            }
+
+            for (int srcIndex = 2; srcIndex < node.SourcesCount; srcIndex++)
+            {
+                Operand src = node.GetSource(srcIndex);
+
+                if (src.Kind == OperandKind.Constant)
+                {
+                    if (!src.Type.IsInteger())
+                    {
+                        src = AddFloatConstantCopy(constants, nodes, node, src);
+
+                        node.SetSource(srcIndex, src);
+                    }
+                    else
+                    {
+                        src = AddIntConstantCopy(constants, nodes, node, src);
+
+                        node.SetSource(srcIndex, src);
+                    }
+                }
+            }
+        }
+
+        private static void HandleDestructiveRegCopy(IntrusiveList<Operation> nodes, Operation node)
+        {
+            if (node.Destination == default || node.SourcesCount == 0)
+            {
+                return;
+            }
+
+            Operand dest = node.Destination;
+            Operand src1 = node.GetSource(0);
+
+            if (IsSameOperandDestSrc1(node) && src1.Kind == OperandKind.LocalVariable)
+            {
+                bool useNewLocal = false;
+
+                for (int srcIndex = 1; srcIndex < node.SourcesCount; srcIndex++)
+                {
+                    if (node.GetSource(srcIndex) == dest)
+                    {
+                        useNewLocal = true;
+
+                        break;
+                    }
+                }
+
+                if (useNewLocal)
+                {
+                    // Dest is being used as some source already, we need to use a new
+                    // local to store the temporary value, otherwise the value on dest
+                    // local would be overwritten.
+                    Operand temp = Local(dest.Type);
+
+                    nodes.AddBefore(node, Operation(Instruction.Copy, temp, src1));
+
+                    node.SetSource(0, temp);
+
+                    nodes.AddAfter(node, Operation(Instruction.Copy, dest, temp));
+
+                    node.Destination = temp;
+                }
+                else
+                {
+                    nodes.AddBefore(node, Operation(Instruction.Copy, dest, src1));
+
+                    node.SetSource(0, dest);
+                }
+            }
+        }
+
+        private static void HandleCall(ConstantDict constants, IntrusiveList<Operation> nodes, Operation node)
+        {
+            Operation operation = node;
+
+            Operand dest = operation.Destination;
+
+            List<Operand> sources = new List<Operand>
+            {
+                operation.GetSource(0)
+            };
+
+            int argsCount = operation.SourcesCount - 1;
+
+            int intMax = CallingConvention.GetArgumentsOnRegsCount();
+            int vecMax = CallingConvention.GetArgumentsOnRegsCount();
+
+            int intCount = 0;
+            int vecCount = 0;
+
+            int stackOffset = 0;
+
+            for (int index = 0; index < argsCount; index++)
+            {
+                Operand source = operation.GetSource(index + 1);
+
+                bool passOnReg;
+
+                if (source.Type.IsInteger())
+                {
+                    passOnReg = intCount < intMax;
+                }
+                else if (source.Type == OperandType.V128)
+                {
+                    passOnReg = intCount + 1 < intMax;
+                }
+                else
+                {
+                    passOnReg = vecCount < vecMax;
+                }
+
+                if (source.Type == OperandType.V128 && passOnReg)
+                {
+                    // V128 is a struct, we pass each half on a GPR if possible.
+                    Operand argReg  = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+                    Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+
+                    nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg,  source, Const(0)));
+                    nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1)));
+
+                    continue;
+                }
+
+                if (passOnReg)
+                {
+                    Operand argReg = source.Type.IsInteger()
+                        ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type)
+                        : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type);
+
+                    Operation copyOp = Operation(Instruction.Copy, argReg, source);
+
+                    HandleConstantRegCopy(constants, nodes, nodes.AddBefore(node, copyOp));
+
+                    sources.Add(argReg);
+                }
+                else
+                {
+                    Operand offset = Const(stackOffset);
+
+                    Operation spillOp = Operation(Instruction.SpillArg, default, offset, source);
+
+                    HandleConstantRegCopy(constants, nodes, nodes.AddBefore(node, spillOp));
+
+                    stackOffset += source.Type.GetSizeInBytes();
+                }
+            }
+
+            if (dest != default)
+            {
+                if (dest.Type == OperandType.V128)
+                {
+                    Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(),     OperandType.I64);
+                    Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
+
+                    node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, retLReg));
+                    nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, retHReg, Const(1)));
+
+                    operation.Destination = default;
+                }
+                else
+                {
+                    Operand retReg = dest.Type.IsInteger()
+                        ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type)
+                        : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type);
+
+                    Operation copyOp = Operation(Instruction.Copy, dest, retReg);
+
+                    nodes.AddAfter(node, copyOp);
+
+                    operation.Destination = retReg;
+                }
+            }
+
+            operation.SetSources(sources.ToArray());
+        }
+
+        private static void HandleTailcall(
+            ConstantDict constants,
+            IntrusiveList<Operation> nodes,
+            StackAllocator stackAlloc,
+            Operation node,
+            Operation operation)
+        {
+            List<Operand> sources = new List<Operand>
+            {
+                operation.GetSource(0)
+            };
+
+            int argsCount = operation.SourcesCount - 1;
+
+            int intMax = CallingConvention.GetArgumentsOnRegsCount();
+            int vecMax = CallingConvention.GetArgumentsOnRegsCount();
+
+            int intCount = 0;
+            int vecCount = 0;
+
+            // Handle arguments passed on registers.
+            for (int index = 0; index < argsCount; index++)
+            {
+                Operand source = operation.GetSource(1 + index);
+
+                bool passOnReg;
+
+                if (source.Type.IsInteger())
+                {
+                    passOnReg = intCount + 1 < intMax;
+                }
+                else
+                {
+                    passOnReg = vecCount < vecMax;
+                }
+
+                if (source.Type == OperandType.V128 && passOnReg)
+                {
+                    // V128 is a struct, we pass each half on a GPR if possible.
+                    Operand argReg  = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+                    Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+
+                    nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0)));
+                    nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1)));
+
+                    continue;
+                }
+
+                if (passOnReg)
+                {
+                    Operand argReg = source.Type.IsInteger()
+                        ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type)
+                        : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type);
+
+                    Operation copyOp = Operation(Instruction.Copy, argReg, source);
+
+                    HandleConstantRegCopy(constants, nodes, nodes.AddBefore(node, copyOp));
+
+                    sources.Add(argReg);
+                }
+                else
+                {
+                    throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)");
+                }
+            }
+
+            // The target address must be on the return registers, since we
+            // don't return anything and it is guaranteed to not be a
+            // callee saved register (which would be trashed on the epilogue).
+            Operand tcAddress = Gpr(CodeGenCommon.TcAddressRegister, OperandType.I64);
+
+            Operation addrCopyOp = Operation(Instruction.Copy, tcAddress, operation.GetSource(0));
+
+            nodes.AddBefore(node, addrCopyOp);
+
+            sources[0] = tcAddress;
+
+            operation.SetSources(sources.ToArray());
+        }
+
+        private static Operation HandleCompareAndSwap(IntrusiveList<Operation> nodes, Operation node)
+        {
+            Operand expected = node.GetSource(1);
+
+            if (expected.Type == OperandType.V128)
+            {
+                Operand dest = node.Destination;
+                Operand expectedLow = Local(OperandType.I64);
+                Operand expectedHigh = Local(OperandType.I64);
+                Operand desiredLow = Local(OperandType.I64);
+                Operand desiredHigh = Local(OperandType.I64);
+                Operand actualLow = Local(OperandType.I64);
+                Operand actualHigh = Local(OperandType.I64);
+
+                Operand address = node.GetSource(0);
+                Operand desired = node.GetSource(2);
+
+                void SplitOperand(Operand source, Operand low, Operand high)
+                {
+                    nodes.AddBefore(node, Operation(Instruction.VectorExtract, low, source, Const(0)));
+                    nodes.AddBefore(node, Operation(Instruction.VectorExtract, high, source, Const(1)));
+                }
+
+                SplitOperand(expected, expectedLow, expectedHigh);
+                SplitOperand(desired, desiredLow, desiredHigh);
+
+                Operation operation = node;
+
+                // Update the sources and destinations with split 64-bit halfs of the whole 128-bit values.
+                // We also need a additional registers that will be used to store temporary information.
+                operation.SetDestinations(new[] { actualLow, actualHigh, Local(OperandType.I64), Local(OperandType.I64) });
+                operation.SetSources(new[] { address, expectedLow, expectedHigh, desiredLow, desiredHigh });
+
+                // Add some dummy uses of the input operands, as the CAS operation will be a loop,
+                // so they can't be used as destination operand.
+                for (int i = 0; i < operation.SourcesCount; i++)
+                {
+                    Operand src = operation.GetSource(i);
+                    node = nodes.AddAfter(node, Operation(Instruction.Copy, src, src));
+                }
+
+                // Assemble the vector with the 64-bit values at the given memory location.
+                node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, actualLow));
+                node = nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, actualHigh, Const(1)));
+            }
+            else
+            {
+                // We need a additional register where the store result will be written to.
+                node.SetDestinations(new[] { node.Destination, Local(OperandType.I32) });
+
+                // Add some dummy uses of the input operands, as the CAS operation will be a loop,
+                // so they can't be used as destination operand.
+                Operation operation = node;
+
+                for (int i = 0; i < operation.SourcesCount; i++)
+                {
+                    Operand src = operation.GetSource(i);
+                    node = nodes.AddAfter(node, Operation(Instruction.Copy, src, src));
+                }
+            }
+
+            return node.ListNext;
+        }
+
+        private static void HandleReturn(IntrusiveList<Operation> nodes, Operation node)
+        {
+            if (node.SourcesCount == 0)
+            {
+                return;
+            }
+
+            Operand source = node.GetSource(0);
+
+            if (source.Type == OperandType.V128)
+            {
+                Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(),     OperandType.I64);
+                Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
+
+                nodes.AddBefore(node, Operation(Instruction.VectorExtract, retLReg, source, Const(0)));
+                nodes.AddBefore(node, Operation(Instruction.VectorExtract, retHReg, source, Const(1)));
+            }
+            else
+            {
+                Operand retReg = source.Type.IsInteger()
+                    ? Gpr(CallingConvention.GetIntReturnRegister(), source.Type)
+                    : Xmm(CallingConvention.GetVecReturnRegister(), source.Type);
+
+                Operation retCopyOp = Operation(Instruction.Copy, retReg, source);
+
+                nodes.AddBefore(node, retCopyOp);
+            }
+        }
+
+        private static Operation HandleLoadArgument(
+            CompilerContext cctx,
+            ref Span<Operation> buffer,
+            IntrusiveList<Operation> nodes,
+            Operand[] preservedArgs,
+            Operation node)
+        {
+            Operand source = node.GetSource(0);
+
+            Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind.");
+
+            int index = source.AsInt32();
+
+            int intCount = 0;
+            int vecCount = 0;
+
+            for (int cIndex = 0; cIndex < index; cIndex++)
+            {
+                OperandType argType = cctx.FuncArgTypes[cIndex];
+
+                if (argType.IsInteger())
+                {
+                    intCount++;
+                }
+                else if (argType == OperandType.V128)
+                {
+                    intCount += 2;
+                }
+                else
+                {
+                    vecCount++;
+                }
+            }
+
+            bool passOnReg;
+
+            if (source.Type.IsInteger())
+            {
+                passOnReg = intCount < CallingConvention.GetArgumentsOnRegsCount();
+            }
+            else if (source.Type == OperandType.V128)
+            {
+                passOnReg = intCount + 1 < CallingConvention.GetArgumentsOnRegsCount();
+            }
+            else
+            {
+                passOnReg = vecCount < CallingConvention.GetArgumentsOnRegsCount();
+            }
+
+            if (passOnReg)
+            {
+                Operand dest = node.Destination;
+
+                if (preservedArgs[index] == default)
+                {
+                    if (dest.Type == OperandType.V128)
+                    {
+                        // V128 is a struct, we pass each half on a GPR if possible.
+                        Operand pArg = Local(OperandType.V128);
+
+                        Operand argLReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount), OperandType.I64);
+                        Operand argHReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount + 1), OperandType.I64);
+
+                        Operation copyL = Operation(Instruction.VectorCreateScalar, pArg, argLReg);
+                        Operation copyH = Operation(Instruction.VectorInsert, pArg, pArg, argHReg, Const(1));
+
+                        cctx.Cfg.Entry.Operations.AddFirst(copyH);
+                        cctx.Cfg.Entry.Operations.AddFirst(copyL);
+
+                        preservedArgs[index] = pArg;
+                    }
+                    else
+                    {
+                        Operand pArg = Local(dest.Type);
+
+                        Operand argReg = dest.Type.IsInteger()
+                            ? Gpr(CallingConvention.GetIntArgumentRegister(intCount), dest.Type)
+                            : Xmm(CallingConvention.GetVecArgumentRegister(vecCount), dest.Type);
+
+                        Operation copyOp = Operation(Instruction.Copy, pArg, argReg);
+
+                        cctx.Cfg.Entry.Operations.AddFirst(copyOp);
+
+                        preservedArgs[index] = pArg;
+                    }
+                }
+
+                Operation nextNode;
+
+                if (dest.AssignmentsCount == 1)
+                {
+                    // Let's propagate the argument if we can to avoid copies.
+                    Propagate(ref buffer, dest, preservedArgs[index]);
+                    nextNode = node.ListNext;
+                }
+                else
+                {
+                    Operation argCopyOp = Operation(Instruction.Copy, dest, preservedArgs[index]);
+                    nextNode = nodes.AddBefore(node, argCopyOp);
+                }
+
+                Delete(nodes, node);
+                return nextNode;
+            }
+            else
+            {
+                // TODO: Pass on stack.
+                return node;
+            }
+        }
+
+        private static void Propagate(ref Span<Operation> buffer, Operand dest, Operand value)
+        {
+            ReadOnlySpan<Operation> uses = dest.GetUses(ref buffer);
+
+            foreach (Operation use in uses)
+            {
+                for (int srcIndex = 0; srcIndex < use.SourcesCount; srcIndex++)
+                {
+                    Operand useSrc = use.GetSource(srcIndex);
+
+                    if (useSrc == dest)
+                    {
+                        use.SetSource(srcIndex, value);
+                    }
+                    else if (useSrc.Kind == OperandKind.Memory)
+                    {
+                        MemoryOperand memoryOp = useSrc.GetMemory();
+
+                        Operand baseAddr = memoryOp.BaseAddress;
+                        Operand index = memoryOp.Index;
+                        bool changed = false;
+
+                        if (baseAddr == dest)
+                        {
+                            baseAddr = value;
+                            changed = true;
+                        }
+
+                        if (index == dest)
+                        {
+                            index = value;
+                            changed = true;
+                        }
+
+                        if (changed)
+                        {
+                            use.SetSource(srcIndex, MemoryOp(
+                                useSrc.Type,
+                                baseAddr,
+                                index,
+                                memoryOp.Scale,
+                                memoryOp.Displacement));
+                        }
+                    }
+                }
+            }
+        }
+
+        private static Operand AddFloatConstantCopy(
+            ConstantDict constants,
+            IntrusiveList<Operation> nodes,
+            Operation node,
+            Operand source)
+        {
+            Operand temp = Local(source.Type);
+
+            Operand intConst = AddIntConstantCopy(constants, nodes, node, GetIntConst(source));
+
+            Operation copyOp = Operation(Instruction.VectorCreateScalar, temp, intConst);
+
+            nodes.AddBefore(node, copyOp);
+
+            return temp;
+        }
+
+        private static Operand AddIntConstantCopy(
+            ConstantDict constants,
+            IntrusiveList<Operation> nodes,
+            Operation node,
+            Operand source)
+        {
+            if (constants.TryGetValue(source.Value, source.Type, out Operand temp))
+            {
+                return temp;
+            }
+
+            temp = Local(source.Type);
+
+            Operation copyOp = Operation(Instruction.Copy, temp, source);
+
+            nodes.AddBefore(node, copyOp);
+
+            constants.Add(source.Value, source.Type, temp);
+
+            return temp;
+        }
+
+        private static Operand GetIntConst(Operand value)
+        {
+            if (value.Type == OperandType.FP32)
+            {
+                return Const(value.AsInt32());
+            }
+            else if (value.Type == OperandType.FP64)
+            {
+                return Const(value.AsInt64());
+            }
+
+            return value;
+        }
+
+        private static void Delete(IntrusiveList<Operation> nodes, Operation node)
+        {
+            node.Destination = default;
+
+            for (int index = 0; index < node.SourcesCount; index++)
+            {
+                node.SetSource(index, default);
+            }
+
+            nodes.Remove(node);
+        }
+
+        private static Operand Gpr(int register, OperandType type)
+        {
+            return Register(register, RegisterType.Integer, type);
+        }
+
+        private static Operand Xmm(int register, OperandType type)
+        {
+            return Register(register, RegisterType.Vector, type);
+        }
+
+        private static bool IsSameOperandDestSrc1(Operation operation)
+        {
+            switch (operation.Instruction)
+            {
+                case Instruction.Extended:
+                    return IsSameOperandDestSrc1(operation.Intrinsic);
+                case Instruction.VectorInsert:
+                case Instruction.VectorInsert16:
+                case Instruction.VectorInsert8:
+                    return true;
+            }
+
+            return false;
+        }
+
+        private static bool IsSameOperandDestSrc1(Intrinsic intrinsic)
+        {
+            IntrinsicInfo info = IntrinsicTable.GetInfo(intrinsic & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask));
+
+            return info.Type == IntrinsicType.ScalarBinaryRd ||
+                   info.Type == IntrinsicType.ScalarTernaryFPRdByElem ||
+                   info.Type == IntrinsicType.ScalarTernaryShlRd ||
+                   info.Type == IntrinsicType.ScalarTernaryShrRd ||
+                   info.Type == IntrinsicType.VectorBinaryRd ||
+                   info.Type == IntrinsicType.VectorInsertByElem ||
+                   info.Type == IntrinsicType.VectorTernaryRd ||
+                   info.Type == IntrinsicType.VectorTernaryRdBitwise ||
+                   info.Type == IntrinsicType.VectorTernaryFPRdByElem ||
+                   info.Type == IntrinsicType.VectorTernaryRdByElem ||
+                   info.Type == IntrinsicType.VectorTernaryShlRd ||
+                   info.Type == IntrinsicType.VectorTernaryShrRd;
+        }
+
+        private static bool HasConstSrc1(Operation node, ulong value)
+        {
+            switch (node.Instruction)
+            {
+                case Instruction.Add:
+                case Instruction.BranchIf:
+                case Instruction.Compare:
+                case Instruction.Subtract:
+                    // The immediate encoding of those instructions does not allow Rn to be
+                    // XZR (it will be SP instead), so we can't allow a Rn constant in this case.
+                    return value == 0 && NotConstOrConst0(node.GetSource(1));
+                case Instruction.BitwiseAnd:
+                case Instruction.BitwiseExclusiveOr:
+                case Instruction.BitwiseNot:
+                case Instruction.BitwiseOr:
+                case Instruction.ByteSwap:
+                case Instruction.CountLeadingZeros:
+                case Instruction.Multiply:
+                case Instruction.Negate:
+                case Instruction.RotateRight:
+                case Instruction.ShiftLeft:
+                case Instruction.ShiftRightSI:
+                case Instruction.ShiftRightUI:
+                    return value == 0;
+                case Instruction.Copy:
+                case Instruction.LoadArgument:
+                case Instruction.Spill:
+                case Instruction.SpillArg:
+                    return true;
+                case Instruction.Extended:
+                    return value == 0;
+            }
+
+            return false;
+        }
+
+        private static bool NotConstOrConst0(Operand operand)
+        {
+            return operand.Kind != OperandKind.Constant || operand.Value == 0;
+        }
+
+        private static bool HasConstSrc2(Instruction inst, Operand operand)
+        {
+            ulong value = operand.Value;
+
+            switch (inst)
+            {
+                case Instruction.Add:
+                case Instruction.BranchIf:
+                case Instruction.Compare:
+                case Instruction.Subtract:
+                    return ConstFitsOnUImm12Sh(value);
+                case Instruction.BitwiseAnd:
+                case Instruction.BitwiseExclusiveOr:
+                case Instruction.BitwiseOr:
+                    return value == 0 || CodeGenCommon.TryEncodeBitMask(operand, out _, out _, out _);
+                case Instruction.Multiply:
+                case Instruction.Store:
+                case Instruction.Store16:
+                case Instruction.Store8:
+                    return value == 0;
+                case Instruction.RotateRight:
+                case Instruction.ShiftLeft:
+                case Instruction.ShiftRightSI:
+                case Instruction.ShiftRightUI:
+                case Instruction.VectorExtract:
+                case Instruction.VectorExtract16:
+                case Instruction.VectorExtract8:
+                    return true;
+                case Instruction.Extended:
+                    // TODO: Check if actual intrinsic is supposed to have consts here?
+                    // Right now we only hit this case for fixed-point int <-> FP conversion instructions.
+                    return true;
+            }
+
+            return false;
+        }
+
+        private static bool IsCommutative(Operation operation)
+        {
+            switch (operation.Instruction)
+            {
+                case Instruction.Add:
+                case Instruction.BitwiseAnd:
+                case Instruction.BitwiseExclusiveOr:
+                case Instruction.BitwiseOr:
+                case Instruction.Multiply:
+                    return true;
+
+                case Instruction.BranchIf:
+                case Instruction.Compare:
+                    {
+                        Operand comp = operation.GetSource(2);
+
+                        Debug.Assert(comp.Kind == OperandKind.Constant);
+
+                        var compType = (Comparison)comp.AsInt32();
+
+                        return compType == Comparison.Equal || compType == Comparison.NotEqual;
+                    }
+            }
+
+            return false;
+        }
+
+        private static bool ConstFitsOnUImm12Sh(ulong value)
+        {
+            return (value & ~0xfffUL) == 0 || (value & ~0xfff000UL) == 0;
+        }
+
+        private static bool IsIntrinsicWithConst(Operation operation)
+        {
+            bool isIntrinsic = IsIntrinsic(operation.Instruction);
+
+            if (isIntrinsic)
+            {
+                Intrinsic intrinsic = operation.Intrinsic;
+                IntrinsicInfo info = IntrinsicTable.GetInfo(intrinsic & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask));
+
+                // Those have integer inputs that don't support consts.
+                return info.Type != IntrinsicType.ScalarFPConvGpr &&
+                       info.Type != IntrinsicType.ScalarFPConvFixedGpr &&
+                       info.Type != IntrinsicType.SetRegister;
+            }
+
+            return false;
+        }
+
+        private static bool IsIntrinsic(Instruction inst)
+        {
+            return inst == Instruction.Extended;
+        }
+    }
+}
diff --git a/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs b/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs
index 0423c25592..c5a22a5376 100644
--- a/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs
+++ b/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs
@@ -90,6 +90,47 @@ namespace ARMeilleure.CodeGen.Optimizations
                     }
                     break;
 
+                case Instruction.Compare:
+                    if (type == OperandType.I32 &&
+                        operation.GetSource(0).Type == type &&
+                        operation.GetSource(1).Type == type)
+                    {
+                        switch ((Comparison)operation.GetSource(2).Value)
+                        {
+                            case Comparison.Equal:
+                                EvaluateBinaryI32(operation, (x, y) => x == y ? 1 : 0);
+                                break;
+                            case Comparison.NotEqual:
+                                EvaluateBinaryI32(operation, (x, y) => x != y ? 1 : 0);
+                                break;
+                            case Comparison.Greater:
+                                EvaluateBinaryI32(operation, (x, y) => x > y ? 1 : 0);
+                                break;
+                            case Comparison.LessOrEqual:
+                                EvaluateBinaryI32(operation, (x, y) => x <= y ? 1 : 0);
+                                break;
+                            case Comparison.GreaterUI:
+                                EvaluateBinaryI32(operation, (x, y) => (uint)x > (uint)y ? 1 : 0);
+                                break;
+                            case Comparison.LessOrEqualUI:
+                                EvaluateBinaryI32(operation, (x, y) => (uint)x <= (uint)y ? 1 : 0);
+                                break;
+                            case Comparison.GreaterOrEqual:
+                                EvaluateBinaryI32(operation, (x, y) => x >= y ? 1 : 0);
+                                break;
+                            case Comparison.Less:
+                                EvaluateBinaryI32(operation, (x, y) => x < y ? 1 : 0);
+                                break;
+                            case Comparison.GreaterOrEqualUI:
+                                EvaluateBinaryI32(operation, (x, y) => (uint)x >= (uint)y ? 1 : 0);
+                                break;
+                            case Comparison.LessUI:
+                                EvaluateBinaryI32(operation, (x, y) => (uint)x < (uint)y ? 1 : 0);
+                                break;
+                        }
+                    }
+                    break;
+
                 case Instruction.Copy:
                     if (type == OperandType.I32)
                     {
diff --git a/ARMeilleure/CodeGen/Optimizations/Optimizer.cs b/ARMeilleure/CodeGen/Optimizations/Optimizer.cs
index 919e996bdd..a45bb4551f 100644
--- a/ARMeilleure/CodeGen/Optimizations/Optimizer.cs
+++ b/ARMeilleure/CodeGen/Optimizations/Optimizer.cs
@@ -44,8 +44,8 @@ namespace ARMeilleure.CodeGen.Optimizations
                         ConstantFolding.RunPass(node);
                         Simplification.RunPass(node);
 
-                        if (DestIsLocalVar(node))
-                        {   
+                        if (DestIsSingleLocalVar(node))
+                        {
                             if (IsPropagableCompare(node))
                             {
                                 modified |= PropagateCompare(ref buffer, node);
@@ -99,20 +99,6 @@ namespace ARMeilleure.CodeGen.Optimizations
             while (modified);
         }
 
-        private static Span<Operation> GetUses(ref Span<Operation> buffer, Operand operand)
-        {
-            ReadOnlySpan<Operation> uses = operand.Uses;
-
-            if (buffer.Length < uses.Length)
-            {
-                buffer = Allocators.Default.AllocateSpan<Operation>((uint)uses.Length);
-            }
-
-            uses.CopyTo(buffer);
-
-            return buffer.Slice(0, uses.Length);
-        }
-
         private static bool PropagateCompare(ref Span<Operation> buffer, Operation compOp)
         {
             // Try to propagate Compare operations into their BranchIf uses, when these BranchIf uses are in the form
@@ -160,7 +146,7 @@ namespace ARMeilleure.CodeGen.Optimizations
 
             Comparison compType = (Comparison)comp.AsInt32();
 
-            Span<Operation> uses = GetUses(ref buffer, dest);
+            Span<Operation> uses = dest.GetUses(ref buffer);
 
             foreach (Operation use in uses)
             {
@@ -199,7 +185,7 @@ namespace ARMeilleure.CodeGen.Optimizations
             Operand dest   = copyOp.Destination;
             Operand source = copyOp.GetSource(0);
 
-            Span<Operation> uses = GetUses(ref buffer, dest);
+            Span<Operation> uses = dest.GetUses(ref buffer);
 
             foreach (Operation use in uses)
             {
@@ -231,12 +217,12 @@ namespace ARMeilleure.CodeGen.Optimizations
 
         private static bool IsUnused(Operation node)
         {
-            return DestIsLocalVar(node) && node.Destination.UsesCount == 0 && !HasSideEffects(node);
+            return DestIsSingleLocalVar(node) && node.Destination.UsesCount == 0 && !HasSideEffects(node);
         }
 
-        private static bool DestIsLocalVar(Operation node)
+        private static bool DestIsSingleLocalVar(Operation node)
         {
-            return node.Destination != default && node.Destination.Kind == OperandKind.LocalVariable;
+            return node.DestinationsCount == 1 && node.Destination.Kind == OperandKind.LocalVariable;
         }
 
         private static bool HasSideEffects(Operation node)
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs
index d8a40365b0..6ea62c28b9 100644
--- a/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs
+++ b/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs
@@ -17,8 +17,6 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
         private const int InstructionGap     = 2;
         private const int InstructionGapMask = InstructionGap - 1;
 
-        private const int RegistersCount = 16;
-
         private HashSet<int> _blockEdges;
         private LiveRange[] _blockRanges;
         private BitMap[] _blockLiveIn;
@@ -59,7 +57,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
 
                 void PopulateFreePositions(RegisterType type, out int[] positions, out int count)
                 {
-                    positions = new int[RegistersCount];
+                    positions = new int[masks.RegistersCount];
                     count = BitOperations.PopCount((uint)masks.GetAvailableRegisters(type));
 
                     int mask = masks.GetAvailableRegisters(type);
@@ -115,7 +113,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
             StackAllocator stackAlloc,
             RegisterMasks regMasks)
         {
-            NumberLocals(cfg);
+            NumberLocals(cfg, regMasks.RegistersCount);
 
             var context = new AllocationContext(stackAlloc, regMasks, _intervals.Count);
 
@@ -134,22 +132,25 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
                 {
                     context.Active.Set(index);
 
-                    if (current.Register.Type == RegisterType.Integer)
+                    if (current.IsFixedAndUsed)
                     {
-                        context.IntUsedRegisters |= 1 << current.Register.Index;
-                    }
-                    else /* if (interval.Register.Type == RegisterType.Vector) */
-                    {
-                        context.VecUsedRegisters |= 1 << current.Register.Index;
+                        if (current.Register.Type == RegisterType.Integer)
+                        {
+                            context.IntUsedRegisters |= 1 << current.Register.Index;
+                        }
+                        else /* if (interval.Register.Type == RegisterType.Vector) */
+                        {
+                            context.VecUsedRegisters |= 1 << current.Register.Index;
+                        }
                     }
 
                     continue;
                 }
 
-                AllocateInterval(context, current, index);
+                AllocateInterval(context, current, index, regMasks.RegistersCount);
             }
 
-            for (int index = RegistersCount * 2; index < _intervals.Count; index++)
+            for (int index = regMasks.RegistersCount * 2; index < _intervals.Count; index++)
             {
                 if (!_intervals[index].IsSpilled)
                 {
@@ -163,7 +164,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
             return new AllocationResult(context.IntUsedRegisters, context.VecUsedRegisters, context.StackAlloc.TotalSize);
         }
 
-        private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex)
+        private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex, int registersCount)
         {
             // Check active intervals that already ended.
             foreach (int iIndex in context.Active)
@@ -199,17 +200,17 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
                 }
             }
 
-            if (!TryAllocateRegWithoutSpill(context, current, cIndex))
+            if (!TryAllocateRegWithoutSpill(context, current, cIndex, registersCount))
             {
-                AllocateRegWithSpill(context, current, cIndex);
+                AllocateRegWithSpill(context, current, cIndex, registersCount);
             }
         }
 
-        private bool TryAllocateRegWithoutSpill(AllocationContext context, LiveInterval current, int cIndex)
+        private bool TryAllocateRegWithoutSpill(AllocationContext context, LiveInterval current, int cIndex, int registersCount)
         {
             RegisterType regType = current.Local.Type.ToRegisterType();
 
-            Span<int> freePositions = stackalloc int[RegistersCount];
+            Span<int> freePositions = stackalloc int[registersCount];
 
             context.GetFreePositions(regType, freePositions, out int freePositionsCount);
 
@@ -278,7 +279,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
                 {
                     Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
 
-                    InsertInterval(splitChild);
+                    InsertInterval(splitChild, registersCount);
                 }
                 else
                 {
@@ -302,12 +303,12 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
             return true;
         }
 
-        private void AllocateRegWithSpill(AllocationContext context, LiveInterval current, int cIndex)
+        private void AllocateRegWithSpill(AllocationContext context, LiveInterval current, int cIndex, int registersCount)
         {
             RegisterType regType = current.Local.Type.ToRegisterType();
 
-            Span<int> usePositions = stackalloc int[RegistersCount];
-            Span<int> blockedPositions = stackalloc int[RegistersCount];
+            Span<int> usePositions = stackalloc int[registersCount];
+            Span<int> blockedPositions = stackalloc int[registersCount];
 
             context.GetFreePositions(regType, usePositions, out _);
             context.GetFreePositions(regType, blockedPositions, out _);
@@ -386,7 +387,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
 
                 Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
 
-                InsertInterval(splitChild);
+                InsertInterval(splitChild, registersCount);
 
                 Spill(context, current);
             }
@@ -396,7 +397,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
                 // so we only need to split the intervals using the selected register.
                 current.Register = new Register(selectedReg, regType);
 
-                SplitAndSpillOverlappingIntervals(context, current);
+                SplitAndSpillOverlappingIntervals(context, current, registersCount);
 
                 context.Active.Set(cIndex);
             }
@@ -417,14 +418,14 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
                 {
                     Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
 
-                    InsertInterval(splitChild);
+                    InsertInterval(splitChild, registersCount);
                 }
                 else
                 {
                     Spill(context, splitChild);
                 }
 
-                SplitAndSpillOverlappingIntervals(context, current);
+                SplitAndSpillOverlappingIntervals(context, current, registersCount);
 
                 context.Active.Set(cIndex);
             }
@@ -460,7 +461,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
             return selected;
         }
 
-        private void SplitAndSpillOverlappingIntervals(AllocationContext context, LiveInterval current)
+        private void SplitAndSpillOverlappingIntervals(AllocationContext context, LiveInterval current, int registersCount)
         {
             foreach (int iIndex in context.Active)
             {
@@ -468,7 +469,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
 
                 if (!interval.IsFixed && interval.Register == current.Register)
                 {
-                    SplitAndSpillOverlappingInterval(context, current, interval);
+                    SplitAndSpillOverlappingInterval(context, current, interval, registersCount);
 
                     context.Active.Clear(iIndex);
                 }
@@ -480,7 +481,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
 
                 if (!interval.IsFixed && interval.Register == current.Register && interval.Overlaps(current))
                 {
-                    SplitAndSpillOverlappingInterval(context, current, interval);
+                    SplitAndSpillOverlappingInterval(context, current, interval, registersCount);
 
                     context.Inactive.Clear(iIndex);
                 }
@@ -490,7 +491,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
         private void SplitAndSpillOverlappingInterval(
             AllocationContext context,
             LiveInterval      current,
-            LiveInterval      interval)
+            LiveInterval      interval,
+            int               registersCount)
         {
             // If there's a next use after the start of the current interval,
             // we need to split the spilled interval twice, and re-insert it
@@ -522,7 +524,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
                     splitChild = right;
                 }
 
-                InsertInterval(splitChild);
+                InsertInterval(splitChild, registersCount);
             }
             else
             {
@@ -530,13 +532,13 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
             }
         }
 
-        private void InsertInterval(LiveInterval interval)
+        private void InsertInterval(LiveInterval interval, int registersCount)
         {
             Debug.Assert(interval.UsesCount != 0, "Trying to insert a interval without uses.");
             Debug.Assert(!interval.IsEmpty,       "Trying to insert a empty interval.");
             Debug.Assert(!interval.IsSpilled,     "Trying to insert a spilled interval.");
 
-            int startIndex = RegistersCount * 2;
+            int startIndex = registersCount * 2;
 
             int insertIndex = _intervals.BinarySearch(startIndex, _intervals.Count - startIndex, interval, null);
 
@@ -790,12 +792,12 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
             return _operationNodes[position / InstructionGap];
         }
 
-        private void NumberLocals(ControlFlowGraph cfg)
+        private void NumberLocals(ControlFlowGraph cfg, int registersCount)
         {
             _operationNodes = new List<(IntrusiveList<Operation>, Operation)>();
             _intervals = new List<LiveInterval>();
 
-            for (int index = 0; index < RegistersCount; index++)
+            for (int index = 0; index < registersCount; index++)
             {
                 _intervals.Add(new LiveInterval(new Register(index, RegisterType.Integer)));
                 _intervals.Add(new LiveInterval(new Register(index, RegisterType.Vector)));
@@ -1041,6 +1043,11 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
                     {
                         LiveInterval interval = _intervals[GetOperandId(dest)];
 
+                        if (interval.IsFixed)
+                        {
+                            interval.IsFixedAndUsed = true;
+                        }
+
                         interval.SetStart(operationPos + 1);
                         interval.AddUsePosition(operationPos + 1);
                     }
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs
index 77ad954163..d739ad281f 100644
--- a/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs
+++ b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs
@@ -27,6 +27,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
             public Register Register;
 
             public bool IsFixed;
+            public bool IsFixedAndUsed;
         }
 
         private readonly Data* _data;
@@ -44,6 +45,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
         public ref int SpillOffset => ref _data->SpillOffset;
 
         public bool IsFixed => _data->IsFixed;
+        public ref bool IsFixedAndUsed => ref _data->IsFixedAndUsed;
         public bool IsEmpty => FirstRange == default;
         public bool IsSplit => Children.Count != 0;
         public bool IsSpilled => SpillOffset != -1;
@@ -114,7 +116,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
             }
             else
             {
-                FirstRange = new LiveRange(position, position + 1); 
+                FirstRange = new LiveRange(position, position + 1);
                 End = position + 1;
             }
         }
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs b/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs
index 5b11aac204..bc948f95f1 100644
--- a/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs
+++ b/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs
@@ -11,6 +11,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
         public int VecCallerSavedRegisters { get; }
         public int IntCalleeSavedRegisters { get; }
         public int VecCalleeSavedRegisters { get; }
+        public int RegistersCount { get; }
 
         public RegisterMasks(
             int intAvailableRegisters,
@@ -18,7 +19,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
             int intCallerSavedRegisters,
             int vecCallerSavedRegisters,
             int intCalleeSavedRegisters,
-            int vecCalleeSavedRegisters)
+            int vecCalleeSavedRegisters,
+            int registersCount)
         {
             IntAvailableRegisters   = intAvailableRegisters;
             VecAvailableRegisters   = vecAvailableRegisters;
@@ -26,6 +28,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
             VecCallerSavedRegisters = vecCallerSavedRegisters;
             IntCalleeSavedRegisters = intCalleeSavedRegisters;
             VecCalleeSavedRegisters = vecCalleeSavedRegisters;
+            RegistersCount          = registersCount;
         }
 
         public int GetAvailableRegisters(RegisterType type)
diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs
index e589da140a..8b5a3fc577 100644
--- a/ARMeilleure/CodeGen/X86/CodeGenerator.cs
+++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs
@@ -16,6 +16,7 @@ namespace ARMeilleure.CodeGen.X86
 {
     static class CodeGenerator
     {
+        private const int RegistersCount = 16;
         private const int PageSize       = 0x1000;
         private const int StackGuardSize = 0x2000;
 
@@ -143,7 +144,8 @@ namespace ARMeilleure.CodeGen.X86
                 CallingConvention.GetIntCallerSavedRegisters(),
                 CallingConvention.GetVecCallerSavedRegisters(),
                 CallingConvention.GetIntCalleeSavedRegisters(),
-                CallingConvention.GetVecCalleeSavedRegisters());
+                CallingConvention.GetVecCalleeSavedRegisters(),
+                RegistersCount);
 
             AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks);
 
diff --git a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
index 6407a9a7b4..8c909ac13d 100644
--- a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
+++ b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
@@ -5,8 +5,6 @@ namespace ARMeilleure.CodeGen.X86
 {
     static class IntrinsicTable
     {
-        private const int BadOp = 0;
-
         private static IntrinsicInfo[] _intrinTable;
 
         static IntrinsicTable()
diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
index b91c522ec9..3e65db23d9 100644
--- a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
@@ -21,22 +21,47 @@ namespace ARMeilleure.Instructions
     {
         public static void Abs_S(ArmEmitterContext context)
         {
-            EmitScalarUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64AbsS);
+            }
+            else
+            {
+                EmitScalarUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+            }
         }
 
         public static void Abs_V(ArmEmitterContext context)
         {
-            EmitVectorUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64AbsV);
+            }
+            else
+            {
+                EmitVectorUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+            }
         }
 
         public static void Add_S(ArmEmitterContext context)
         {
-            EmitScalarBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64AddS);
+            }
+            else
+            {
+                EmitScalarBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+            }
         }
 
         public static void Add_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AddV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -62,24 +87,42 @@ namespace ARMeilleure.Instructions
 
         public static void Addhn_V(ArmEmitterContext context)
         {
-            EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: false);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64AddhnV);
+            }
+            else
+            {
+                EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: false);
+            }
         }
 
         public static void Addp_S(ArmEmitterContext context)
         {
-            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64AddpS);
+            }
+            else
+            {
+                OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 
-            Operand ne0 = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
-            Operand ne1 = EmitVectorExtractZx(context, op.Rn, 1, op.Size);
+                Operand ne0 = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+                Operand ne1 = EmitVectorExtractZx(context, op.Rn, 1, op.Size);
 
-            Operand res = context.Add(ne0, ne1);
+                Operand res = context.Add(ne0, ne1);
 
-            context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, op.Size));
+                context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, op.Size));
+            }
         }
 
         public static void Addp_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSsse3)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AddpV);
+            }
+            else if (Optimizations.UseSsse3)
             {
                 EmitSsse3VectorPairwiseOp(context, X86PaddInstruction);
             }
@@ -91,68 +134,89 @@ namespace ARMeilleure.Instructions
 
         public static void Addv_V(ArmEmitterContext context)
         {
-            EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64AddvV);
+            }
+            else
+            {
+                EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2));
+            }
         }
 
         public static void Cls_V(ArmEmitterContext context)
         {
-            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
-
-            Operand res = context.VectorZero();
-
-            int elems = op.GetBytesCount() >> op.Size;
-
-            int eSize = 8 << op.Size;
-
-            for (int index = 0; index < elems; index++)
+            if (Optimizations.UseAdvSimd)
             {
-                Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
-
-                Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingSigns)), ne, Const(eSize));
-
-                res = EmitVectorInsert(context, res, de, index, op.Size);
-            }
-
-            context.Copy(GetVec(op.Rd), res);
-        }
-
-        public static void Clz_V(ArmEmitterContext context)
-        {
-            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
-
-            int eSize = 8 << op.Size;
-
-            Operand res = eSize switch {
-                8  => Clz_V_I8 (context, GetVec(op.Rn)),
-                16 => Clz_V_I16(context, GetVec(op.Rn)),
-                32 => Clz_V_I32(context, GetVec(op.Rn)),
-                _  => default
-            };
-
-            if (res != default)
-            {
-                if (op.RegisterSize == RegisterSize.Simd64)
-                {
-                    res = context.VectorZeroUpper64(res);
-                }
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ClsV);
             }
             else
             {
+                OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+                Operand res = context.VectorZero();
+
                 int elems = op.GetBytesCount() >> op.Size;
 
-                res = context.VectorZero();
+                int eSize = 8 << op.Size;
 
                 for (int index = 0; index < elems; index++)
                 {
                     Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
 
-                    Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingZeros)), ne, Const(eSize));
+                    Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingSigns)), ne, Const(eSize));
 
                     res = EmitVectorInsert(context, res, de, index, op.Size);
                 }
-            }
 
-            context.Copy(GetVec(op.Rd), res);
+                context.Copy(GetVec(op.Rd), res);
+            }
+        }
+
+        public static void Clz_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ClzV);
+            }
+            else
+            {
+                OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+                int eSize = 8 << op.Size;
+
+                Operand res = eSize switch {
+                    8  => Clz_V_I8 (context, GetVec(op.Rn)),
+                    16 => Clz_V_I16(context, GetVec(op.Rn)),
+                    32 => Clz_V_I32(context, GetVec(op.Rn)),
+                    _  => default
+                };
+
+                if (res != default)
+                {
+                    if (op.RegisterSize == RegisterSize.Simd64)
+                    {
+                        res = context.VectorZeroUpper64(res);
+                    }
+                }
+                else
+                {
+                    int elems = op.GetBytesCount() >> op.Size;
+
+                    res = context.VectorZero();
+
+                    for (int index = 0; index < elems; index++)
+                    {
+                        Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+                        Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingZeros)), ne, Const(eSize));
+
+                        res = EmitVectorInsert(context, res, de, index, op.Size);
+                    }
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
         }
 
         private static Operand Clz_V_I8(ArmEmitterContext context, Operand arg)
@@ -271,36 +335,47 @@ namespace ARMeilleure.Instructions
 
         public static void Cnt_V(ArmEmitterContext context)
         {
-            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
-
-            Operand res = context.VectorZero();
-
-            int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8;
-
-            for (int index = 0; index < elems; index++)
+            if (Optimizations.UseAdvSimd)
             {
-                Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0);
-
-                Operand de;
-
-                if (Optimizations.UsePopCnt)
-                {
-                    de = context.AddIntrinsicLong(Intrinsic.X86Popcnt, ne);
-                }
-                else
-                {
-                    de = EmitCountSetBits8(context, ne);
-                }
-
-                res = EmitVectorInsert(context, res, de, index, 0);
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64CntV);
             }
+            else
+            {
+                OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 
-            context.Copy(GetVec(op.Rd), res);
+                Operand res = context.VectorZero();
+
+                int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8;
+
+                for (int index = 0; index < elems; index++)
+                {
+                    Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0);
+
+                    Operand de;
+
+                    if (Optimizations.UsePopCnt)
+                    {
+                        de = context.AddIntrinsicLong(Intrinsic.X86Popcnt, ne);
+                    }
+                    else
+                    {
+                        de = EmitCountSetBits8(context, ne);
+                    }
+
+                    res = EmitVectorInsert(context, res, de, index, 0);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
         }
 
         public static void Fabd_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FabdS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -336,7 +411,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fabd_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FabdV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -377,7 +456,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fabs_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FabsS);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 
@@ -405,7 +488,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fabs_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FabsV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 
@@ -440,7 +527,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fadd_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FaddS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarBinaryOpF(context, Intrinsic.X86Addss, Intrinsic.X86Addsd);
             }
@@ -459,7 +550,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fadd_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FaddV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpF(context, Intrinsic.X86Addps, Intrinsic.X86Addpd);
             }
@@ -478,7 +573,11 @@ namespace ARMeilleure.Instructions
 
         public static void Faddp_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse3)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FaddpS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse3)
             {
                 OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 
@@ -506,7 +605,11 @@ namespace ARMeilleure.Instructions
 
         public static void Faddp_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FaddpV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
                 {
@@ -534,7 +637,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fdiv_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FdivS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarBinaryOpF(context, Intrinsic.X86Divss, Intrinsic.X86Divsd);
             }
@@ -553,7 +660,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fdiv_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FdivV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpF(context, Intrinsic.X86Divps, Intrinsic.X86Divpd);
             }
@@ -572,7 +683,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmadd_S(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FmaddS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -607,7 +722,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmax_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmaxS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
                 {
@@ -628,7 +747,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmax_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
                 {
@@ -649,7 +772,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmaxnm_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmaxnmS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: true);
             }
@@ -664,7 +791,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmaxnm_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxnmV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false);
             }
@@ -679,7 +810,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmaxnmp_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FmaxnmpS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse2ScalarPairwiseOpF(context, (op1, op2) =>
                 {
@@ -697,7 +832,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmaxnmp_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxnmpV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
                 {
@@ -715,7 +854,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmaxnmv_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FmaxnmvV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
                 {
@@ -733,7 +876,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmaxp_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxpV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
                 {
@@ -757,7 +904,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmaxv_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FmaxvV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
                 {
@@ -781,7 +932,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmin_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FminS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
                 {
@@ -802,7 +957,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmin_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
                 {
@@ -823,7 +982,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fminnm_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FminnmS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: true);
             }
@@ -838,7 +1001,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fminnm_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminnmV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false);
             }
@@ -853,7 +1020,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fminnmp_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FminnmpS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse2ScalarPairwiseOpF(context, (op1, op2) =>
                 {
@@ -871,7 +1042,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fminnmp_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminnmpV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
                 {
@@ -889,7 +1064,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fminnmv_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FminnmvV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
                 {
@@ -907,7 +1086,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fminp_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminpV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
                 {
@@ -931,7 +1114,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fminv_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FminvV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
                 {
@@ -955,15 +1142,26 @@ namespace ARMeilleure.Instructions
 
         public static void Fmla_Se(ArmEmitterContext context) // Fused.
         {
-            EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return context.Add(op1, context.Multiply(op2, op3));
-            });
+                InstEmitSimdHelperArm64.EmitScalarTernaryOpFRdByElem(context, Intrinsic.Arm64FmlaSe);
+            }
+            else
+            {
+                EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
+                {
+                    return context.Add(op1, context.Multiply(op2, op3));
+                });
+            }
         }
 
         public static void Fmla_V(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpFRd(context, Intrinsic.Arm64FmlaV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -1006,7 +1204,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmla_Ve(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpFRdByElem(context, Intrinsic.Arm64FmlaVe);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
 
@@ -1055,15 +1257,26 @@ namespace ARMeilleure.Instructions
 
         public static void Fmls_Se(ArmEmitterContext context) // Fused.
         {
-            EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return context.Subtract(op1, context.Multiply(op2, op3));
-            });
+                InstEmitSimdHelperArm64.EmitScalarTernaryOpFRdByElem(context, Intrinsic.Arm64FmlsSe);
+            }
+            else
+            {
+                EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
+                {
+                    return context.Subtract(op1, context.Multiply(op2, op3));
+                });
+            }
         }
 
         public static void Fmls_V(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpFRd(context, Intrinsic.Arm64FmlsV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -1106,7 +1319,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmls_Ve(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpFRdByElem(context, Intrinsic.Arm64FmlsVe);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
 
@@ -1155,7 +1372,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmsub_S(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FmsubS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -1190,7 +1411,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmul_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmulS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarBinaryOpF(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd);
             }
@@ -1209,12 +1434,23 @@ namespace ARMeilleure.Instructions
 
         public static void Fmul_Se(ArmEmitterContext context)
         {
-            EmitScalarBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpFByElem(context, Intrinsic.Arm64FmulSe);
+            }
+            else
+            {
+                EmitScalarBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2));
+            }
         }
 
         public static void Fmul_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmulV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpF(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
             }
@@ -1233,7 +1469,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmul_Ve(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpFByElem(context, Intrinsic.Arm64FmulVe);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
 
@@ -1283,39 +1523,71 @@ namespace ARMeilleure.Instructions
 
         public static void Fmulx_S(ArmEmitterContext context)
         {
-            EmitScalarBinaryOpF(context, (op1, op2) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
-            });
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmulxS);
+            }
+            else
+            {
+                EmitScalarBinaryOpF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
+                });
+            }
         }
 
         public static void Fmulx_Se(ArmEmitterContext context)
         {
-            EmitScalarBinaryOpByElemF(context, (op1, op2) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
-            });
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpFByElem(context, Intrinsic.Arm64FmulxSe);
+            }
+            else
+            {
+                EmitScalarBinaryOpByElemF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
+                });
+            }
         }
 
         public static void Fmulx_V(ArmEmitterContext context)
         {
-            EmitVectorBinaryOpF(context, (op1, op2) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
-            });
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmulxV);
+            }
+            else
+            {
+                EmitVectorBinaryOpF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
+                });
+            }
         }
 
         public static void Fmulx_Ve(ArmEmitterContext context)
         {
-            EmitVectorBinaryOpByElemF(context, (op1, op2) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
-            });
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpFByElem(context, Intrinsic.Arm64FmulxVe);
+            }
+            else
+            {
+                EmitVectorBinaryOpByElemF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
+                });
+            }
         }
 
         public static void Fneg_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FnegS);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 
@@ -1344,7 +1616,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fneg_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FnegV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 
@@ -1380,7 +1656,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fnmadd_S(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FnmaddS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -1423,7 +1703,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fnmsub_S(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FnmsubS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -1466,7 +1750,14 @@ namespace ARMeilleure.Instructions
 
         public static void Fnmul_S(ArmEmitterContext context)
         {
-            EmitScalarBinaryOpF(context, (op1, op2) => context.Negate(context.Multiply(op1, op2)));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FnmulS);
+            }
+            else
+            {
+                EmitScalarBinaryOpF(context, (op1, op2) => context.Negate(context.Multiply(op1, op2)));
+            }
         }
 
         public static void Frecpe_S(ArmEmitterContext context)
@@ -1475,7 +1766,11 @@ namespace ARMeilleure.Instructions
 
             int sizeF = op.Size & 1;
 
-            if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrecpeS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
             {
                 Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rcpss, GetVec(op.Rn)), scalar: true);
 
@@ -1496,7 +1791,11 @@ namespace ARMeilleure.Instructions
 
             int sizeF = op.Size & 1;
 
-            if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrecpeV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
             {
                 Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rcpps, GetVec(op.Rn)), scalar: false);
 
@@ -1518,7 +1817,11 @@ namespace ARMeilleure.Instructions
 
         public static void Frecps_S(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrecpsS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -1561,7 +1864,11 @@ namespace ARMeilleure.Instructions
 
         public static void Frecps_V(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FrecpsV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -1609,15 +1916,26 @@ namespace ARMeilleure.Instructions
 
         public static void Frecpx_S(ArmEmitterContext context)
         {
-            EmitScalarUnaryOpF(context, (op1) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecpX), op1);
-            });
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrecpxS);
+            }
+            else
+            {
+                EmitScalarUnaryOpF(context, (op1) =>
+                {
+                    return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecpX), op1);
+                });
+            }
         }
 
         public static void Frinta_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintaS);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41ScalarRoundOpF(context, FPRoundingMode.ToNearestAway);
             }
@@ -1632,7 +1950,11 @@ namespace ARMeilleure.Instructions
 
         public static void Frinta_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintaV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41VectorRoundOpF(context, FPRoundingMode.ToNearestAway);
             }
@@ -1647,23 +1969,41 @@ namespace ARMeilleure.Instructions
 
         public static void Frinti_S(ArmEmitterContext context)
         {
-            EmitScalarUnaryOpF(context, (op1) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return EmitRoundByRMode(context, op1);
-            });
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintiS);
+            }
+            else
+            {
+                EmitScalarUnaryOpF(context, (op1) =>
+                {
+                    return EmitRoundByRMode(context, op1);
+                });
+            }
         }
 
         public static void Frinti_V(ArmEmitterContext context)
         {
-            EmitVectorUnaryOpF(context, (op1) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return EmitRoundByRMode(context, op1);
-            });
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintiV);
+            }
+            else
+            {
+                EmitVectorUnaryOpF(context, (op1) =>
+                {
+                    return EmitRoundByRMode(context, op1);
+                });
+            }
         }
 
         public static void Frintm_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintmS);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
             }
@@ -1678,7 +2018,11 @@ namespace ARMeilleure.Instructions
 
         public static void Frintm_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintmV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
             }
@@ -1693,7 +2037,11 @@ namespace ARMeilleure.Instructions
 
         public static void Frintn_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintnS);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41ScalarRoundOpF(context, FPRoundingMode.ToNearest);
             }
@@ -1708,7 +2056,11 @@ namespace ARMeilleure.Instructions
 
         public static void Frintn_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintnV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41VectorRoundOpF(context, FPRoundingMode.ToNearest);
             }
@@ -1723,7 +2075,11 @@ namespace ARMeilleure.Instructions
 
         public static void Frintp_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintpS);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
             }
@@ -1738,7 +2094,11 @@ namespace ARMeilleure.Instructions
 
         public static void Frintp_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintpV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
             }
@@ -1753,6 +2113,7 @@ namespace ARMeilleure.Instructions
 
         public static void Frintx_S(ArmEmitterContext context)
         {
+            // TODO Arm64: Fast path. Should we set host FPCR?
             EmitScalarUnaryOpF(context, (op1) =>
             {
                 return EmitRoundByRMode(context, op1);
@@ -1761,6 +2122,7 @@ namespace ARMeilleure.Instructions
 
         public static void Frintx_V(ArmEmitterContext context)
         {
+            // TODO Arm64: Fast path. Should we set host FPCR?
             EmitVectorUnaryOpF(context, (op1) =>
             {
                 return EmitRoundByRMode(context, op1);
@@ -1769,7 +2131,11 @@ namespace ARMeilleure.Instructions
 
         public static void Frintz_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintzS);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsZero);
             }
@@ -1784,7 +2150,11 @@ namespace ARMeilleure.Instructions
 
         public static void Frintz_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintzV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsZero);
             }
@@ -1803,7 +2173,11 @@ namespace ARMeilleure.Instructions
 
             int sizeF = op.Size & 1;
 
-            if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrsqrteS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
             {
                 Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rsqrtss, GetVec(op.Rn)), scalar: true);
 
@@ -1824,7 +2198,11 @@ namespace ARMeilleure.Instructions
 
             int sizeF = op.Size & 1;
 
-            if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrsqrteV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
             {
                 Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rsqrtps, GetVec(op.Rn)), scalar: false);
 
@@ -1846,7 +2224,11 @@ namespace ARMeilleure.Instructions
 
         public static void Frsqrts_S(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrsqrtsS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -1895,7 +2277,11 @@ namespace ARMeilleure.Instructions
 
         public static void Frsqrts_V(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FrsqrtsV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -1949,7 +2335,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fsqrt_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FsqrtS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarUnaryOpF(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd);
             }
@@ -1964,7 +2354,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fsqrt_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FsqrtV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitVectorUnaryOpF(context, Intrinsic.X86Sqrtps, Intrinsic.X86Sqrtpd);
             }
@@ -1979,7 +2373,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fsub_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FsubS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarBinaryOpF(context, Intrinsic.X86Subss, Intrinsic.X86Subsd);
             }
@@ -1998,7 +2396,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fsub_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FsubV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpF(context, Intrinsic.X86Subps, Intrinsic.X86Subpd);
             }
@@ -2017,7 +2419,11 @@ namespace ARMeilleure.Instructions
 
         public static void Mla_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64MlaV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41VectorMul_AddSub(context, AddSub.Add);
             }
@@ -2032,15 +2438,26 @@ namespace ARMeilleure.Instructions
 
         public static void Mla_Ve(ArmEmitterContext context)
         {
-            EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return context.Add(op1, context.Multiply(op2, op3));
-            });
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64MlaVe);
+            }
+            else
+            {
+                EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) =>
+                {
+                    return context.Add(op1, context.Multiply(op2, op3));
+                });
+            }
         }
 
         public static void Mls_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64MlsV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41VectorMul_AddSub(context, AddSub.Subtract);
             }
@@ -2055,15 +2472,26 @@ namespace ARMeilleure.Instructions
 
         public static void Mls_Ve(ArmEmitterContext context)
         {
-            EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return context.Subtract(op1, context.Multiply(op2, op3));
-            });
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64MlsVe);
+            }
+            else
+            {
+                EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) =>
+                {
+                    return context.Subtract(op1, context.Multiply(op2, op3));
+                });
+            }
         }
 
         public static void Mul_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64MulV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41VectorMul_AddSub(context, AddSub.None);
             }
@@ -2075,17 +2503,35 @@ namespace ARMeilleure.Instructions
 
         public static void Mul_Ve(ArmEmitterContext context)
         {
-            EmitVectorBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64MulVe);
+            }
+            else
+            {
+                EmitVectorBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2));
+            }
         }
 
         public static void Neg_S(ArmEmitterContext context)
         {
-            EmitScalarUnaryOpSx(context, (op1) => context.Negate(op1));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64NegS);
+            }
+            else
+            {
+                EmitScalarUnaryOpSx(context, (op1) => context.Negate(op1));
+            }
         }
 
         public static void Neg_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64NegV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 
@@ -2110,7 +2556,11 @@ namespace ARMeilleure.Instructions
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UsePclmulqdq && op.Size == 3)
+            if (Optimizations.UseAdvSimd && false) // Not supported by all Arm CPUs.
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64PmullV);
+            }
+            else if (Optimizations.UsePclmulqdq && op.Size == 3)
             {
                 Operand n = GetVec(op.Rn);
                 Operand m = GetVec(op.Rm);
@@ -2214,33 +2664,65 @@ namespace ARMeilleure.Instructions
 
         public static void Raddhn_V(ArmEmitterContext context)
         {
-            EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: true);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64RaddhnV);
+            }
+            else
+            {
+                EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: true);
+            }
         }
 
         public static void Rsubhn_V(ArmEmitterContext context)
         {
-            EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: true);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64RsubhnV);
+            }
+            else
+            {
+                EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: true);
+            }
         }
 
         public static void Saba_V(ArmEmitterContext context)
         {
-            EmitVectorTernaryOpSx(context, (op1, op2, op3) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
-            });
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SabaV);
+            }
+            else
+            {
+                EmitVectorTernaryOpSx(context, (op1, op2, op3) =>
+                {
+                    return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+                });
+            }
         }
 
         public static void Sabal_V(ArmEmitterContext context)
         {
-            EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
-            });
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SabalV);
+            }
+            else
+            {
+                EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) =>
+                {
+                    return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+                });
+            }
         }
 
         public static void Sabd_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SabdV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -2262,7 +2744,11 @@ namespace ARMeilleure.Instructions
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UseSse41 && op.Size < 2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SabdlV);
+            }
+            else if (Optimizations.UseSse41 && op.Size < 2)
             {
                 Operand n = GetVec(op.Rn);
                 Operand m = GetVec(op.Rm);
@@ -2293,12 +2779,23 @@ namespace ARMeilleure.Instructions
 
         public static void Sadalp_V(ArmEmitterContext context)
         {
-            EmitAddLongPairwise(context, signed: true, accumulate: true);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpRd(context, Intrinsic.Arm64SadalpV);
+            }
+            else
+            {
+                EmitAddLongPairwise(context, signed: true, accumulate: true);
+            }
         }
 
         public static void Saddl_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SaddlV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -2328,17 +2825,35 @@ namespace ARMeilleure.Instructions
 
         public static void Saddlp_V(ArmEmitterContext context)
         {
-            EmitAddLongPairwise(context, signed: true, accumulate: false);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SaddlpV);
+            }
+            else
+            {
+                EmitAddLongPairwise(context, signed: true, accumulate: false);
+            }
         }
 
         public static void Saddlv_V(ArmEmitterContext context)
         {
-            EmitVectorLongAcrossVectorOpSx(context, (op1, op2) => context.Add(op1, op2));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SaddlvV);
+            }
+            else
+            {
+                EmitVectorLongAcrossVectorOpSx(context, (op1, op2) => context.Add(op1, op2));
+            }
         }
 
         public static void Saddw_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SaddwV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -2368,7 +2883,11 @@ namespace ARMeilleure.Instructions
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size > 0)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64ShaddV);
+            }
+            else if (Optimizations.UseSse2 && op.Size > 0)
             {
                 Operand n = GetVec(op.Rn);
                 Operand m = GetVec(op.Rm);
@@ -2404,7 +2923,11 @@ namespace ARMeilleure.Instructions
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size < 2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64ShsubV);
+            }
+            else if (Optimizations.UseSse2 && op.Size < 2)
             {
                 Operand n = GetVec(op.Rn);
                 Operand m = GetVec(op.Rm);
@@ -2442,7 +2965,11 @@ namespace ARMeilleure.Instructions
 
         public static void Smax_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmaxV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -2468,7 +2995,11 @@ namespace ARMeilleure.Instructions
 
         public static void Smaxp_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSsse3)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmaxpV);
+            }
+            else if (Optimizations.UseSsse3)
             {
                 EmitSsse3VectorPairwiseOp(context, X86PmaxsInstruction);
             }
@@ -2480,12 +3011,23 @@ namespace ARMeilleure.Instructions
 
         public static void Smaxv_V(ArmEmitterContext context)
         {
-            EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: true));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SmaxvV);
+            }
+            else
+            {
+                EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: true));
+            }
         }
 
         public static void Smin_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SminV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -2511,7 +3053,11 @@ namespace ARMeilleure.Instructions
 
         public static void Sminp_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSsse3)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SminpV);
+            }
+            else if (Optimizations.UseSsse3)
             {
                 EmitSsse3VectorPairwiseOp(context, X86PminsInstruction);
             }
@@ -2523,14 +3069,25 @@ namespace ARMeilleure.Instructions
 
         public static void Sminv_V(ArmEmitterContext context)
         {
-            EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: true));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SminvV);
+            }
+            else
+            {
+                EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: true));
+            }
         }
 
         public static void Smlal_V(ArmEmitterContext context)
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UseSse41 && op.Size < 2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SmlalV);
+            }
+            else if (Optimizations.UseSse41 && op.Size < 2)
             {
                 Operand d = GetVec(op.Rd);
                 Operand n = GetVec(op.Rn);
@@ -2566,17 +3123,28 @@ namespace ARMeilleure.Instructions
 
         public static void Smlal_Ve(ArmEmitterContext context)
         {
-            EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return context.Add(op1, context.Multiply(op2, op3));
-            });
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64SmlalVe);
+            }
+            else
+            {
+                EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) =>
+                {
+                    return context.Add(op1, context.Multiply(op2, op3));
+                });
+            }
         }
 
         public static void Smlsl_V(ArmEmitterContext context)
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UseSse41 && op.Size < 2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SmlslV);
+            }
+            else if (Optimizations.UseSse41 && op.Size < 2)
             {
                 Operand d = GetVec(op.Rd);
                 Operand n = GetVec(op.Rn);
@@ -2612,117 +3180,268 @@ namespace ARMeilleure.Instructions
 
         public static void Smlsl_Ve(ArmEmitterContext context)
         {
-            EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return context.Subtract(op1, context.Multiply(op2, op3));
-            });
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64SmlslVe);
+            }
+            else
+            {
+                EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) =>
+                {
+                    return context.Subtract(op1, context.Multiply(op2, op3));
+                });
+            }
         }
 
         public static void Smull_V(ArmEmitterContext context)
         {
-            EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Multiply(op1, op2));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmullV);
+            }
+            else
+            {
+                EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Multiply(op1, op2));
+            }
         }
 
         public static void Smull_Ve(ArmEmitterContext context)
         {
-            EmitVectorWidenBinaryOpByElemSx(context, (op1, op2) => context.Multiply(op1, op2));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64SmullVe);
+            }
+            else
+            {
+                EmitVectorWidenBinaryOpByElemSx(context, (op1, op2) => context.Multiply(op1, op2));
+            }
         }
 
         public static void Sqabs_S(ArmEmitterContext context)
         {
-            EmitScalarSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingUnaryOp(context, Intrinsic.Arm64SqabsS);
+            }
+            else
+            {
+                EmitScalarSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+            }
         }
 
         public static void Sqabs_V(ArmEmitterContext context)
         {
-            EmitVectorSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingUnaryOp(context, Intrinsic.Arm64SqabsV);
+            }
+            else
+            {
+                EmitVectorSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+            }
         }
 
         public static void Sqadd_S(ArmEmitterContext context)
         {
-            EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqaddS);
+            }
+            else
+            {
+                EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add);
+            }
         }
 
         public static void Sqadd_V(ArmEmitterContext context)
         {
-            EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqaddV);
+            }
+            else
+            {
+                EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add);
+            }
         }
 
         public static void Sqdmulh_S(ArmEmitterContext context)
         {
-            EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqdmulhS);
+            }
+            else
+            {
+                EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false));
+            }
         }
 
         public static void Sqdmulh_V(ArmEmitterContext context)
         {
-            EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqdmulhV);
+            }
+            else
+            {
+                EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false));
+            }
         }
 
         public static void Sqdmulh_Ve(ArmEmitterContext context)
         {
-            EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpByElem(context, Intrinsic.Arm64SqdmulhVe);
+            }
+            else
+            {
+                EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false));
+            }
         }
 
         public static void Sqneg_S(ArmEmitterContext context)
         {
-            EmitScalarSaturatingUnaryOpSx(context, (op1) => context.Negate(op1));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingUnaryOp(context, Intrinsic.Arm64SqnegS);
+            }
+            else
+            {
+                EmitScalarSaturatingUnaryOpSx(context, (op1) => context.Negate(op1));
+            }
         }
 
         public static void Sqneg_V(ArmEmitterContext context)
         {
-            EmitVectorSaturatingUnaryOpSx(context, (op1) => context.Negate(op1));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingUnaryOp(context, Intrinsic.Arm64SqnegV);
+            }
+            else
+            {
+                EmitVectorSaturatingUnaryOpSx(context, (op1) => context.Negate(op1));
+            }
         }
 
         public static void Sqrdmulh_S(ArmEmitterContext context)
         {
-            EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqrdmulhS);
+            }
+            else
+            {
+                EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true));
+            }
         }
 
         public static void Sqrdmulh_V(ArmEmitterContext context)
         {
-            EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqrdmulhV);
+            }
+            else
+            {
+                EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true));
+            }
         }
 
         public static void Sqrdmulh_Ve(ArmEmitterContext context)
         {
-            EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpByElem(context, Intrinsic.Arm64SqrdmulhVe);
+            }
+            else
+            {
+                EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true));
+            }
         }
 
         public static void Sqsub_S(ArmEmitterContext context)
         {
-            EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqsubS);
+            }
+            else
+            {
+                EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub);
+            }
         }
 
         public static void Sqsub_V(ArmEmitterContext context)
         {
-            EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqsubV);
+            }
+            else
+            {
+                EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub);
+            }
         }
 
         public static void Sqxtn_S(ArmEmitterContext context)
         {
-            EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxSx);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtnS);
+            }
+            else
+            {
+                EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxSx);
+            }
         }
 
         public static void Sqxtn_V(ArmEmitterContext context)
         {
-            EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxSx);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtnV);
+            }
+            else
+            {
+                EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxSx);
+            }
         }
 
         public static void Sqxtun_S(ArmEmitterContext context)
         {
-            EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtunS);
+            }
+            else
+            {
+                EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxZx);
+            }
         }
 
         public static void Sqxtun_V(ArmEmitterContext context)
         {
-            EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtunV);
+            }
+            else
+            {
+                EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxZx);
+            }
         }
 
         public static void Srhadd_V(ArmEmitterContext context)
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size < 2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SrhaddV);
+            }
+            else if (Optimizations.UseSse2 && op.Size < 2)
             {
                 Operand n = GetVec(op.Rn);
                 Operand m = GetVec(op.Rm);
@@ -2764,7 +3483,11 @@ namespace ARMeilleure.Instructions
 
         public static void Ssubl_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SsublV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -2794,7 +3517,11 @@ namespace ARMeilleure.Instructions
 
         public static void Ssubw_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SsubwV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -2822,12 +3549,23 @@ namespace ARMeilleure.Instructions
 
         public static void Sub_S(ArmEmitterContext context)
         {
-            EmitScalarBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64SubS);
+            }
+            else
+            {
+                EmitScalarBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+            }
         }
 
         public static void Sub_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SubV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -2853,38 +3591,77 @@ namespace ARMeilleure.Instructions
 
         public static void Subhn_V(ArmEmitterContext context)
         {
-            EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: false);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SubhnV);
+            }
+            else
+            {
+                EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: false);
+            }
         }
 
         public static void Suqadd_S(ArmEmitterContext context)
         {
-            EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SuqaddS);
+            }
+            else
+            {
+                EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate);
+            }
         }
 
         public static void Suqadd_V(ArmEmitterContext context)
         {
-            EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SuqaddV);
+            }
+            else
+            {
+                EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate);
+            }
         }
 
         public static void Uaba_V(ArmEmitterContext context)
         {
-            EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
-            });
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UabaV);
+            }
+            else
+            {
+                EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+                {
+                    return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+                });
+            }
         }
 
         public static void Uabal_V(ArmEmitterContext context)
         {
-            EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
-            });
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UabalV);
+            }
+            else
+            {
+                EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) =>
+                {
+                    return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+                });
+            }
         }
 
         public static void Uabd_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UabdV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -2906,7 +3683,11 @@ namespace ARMeilleure.Instructions
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UseSse41 && op.Size < 2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UabdlV);
+            }
+            else if (Optimizations.UseSse41 && op.Size < 2)
             {
                 Operand n = GetVec(op.Rn);
                 Operand m = GetVec(op.Rm);
@@ -2937,12 +3718,23 @@ namespace ARMeilleure.Instructions
 
         public static void Uadalp_V(ArmEmitterContext context)
         {
-            EmitAddLongPairwise(context, signed: false, accumulate: true);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpRd(context, Intrinsic.Arm64UadalpV);
+            }
+            else
+            {
+                EmitAddLongPairwise(context, signed: false, accumulate: true);
+            }
         }
 
         public static void Uaddl_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UaddlV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -2972,17 +3764,35 @@ namespace ARMeilleure.Instructions
 
         public static void Uaddlp_V(ArmEmitterContext context)
         {
-            EmitAddLongPairwise(context, signed: false, accumulate: false);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UaddlpV);
+            }
+            else
+            {
+                EmitAddLongPairwise(context, signed: false, accumulate: false);
+            }
         }
 
         public static void Uaddlv_V(ArmEmitterContext context)
         {
-            EmitVectorLongAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UaddlvV);
+            }
+            else
+            {
+                EmitVectorLongAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2));
+            }
         }
 
         public static void Uaddw_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UaddwV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -3012,7 +3822,11 @@ namespace ARMeilleure.Instructions
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size > 0)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UhaddV);
+            }
+            else if (Optimizations.UseSse2 && op.Size > 0)
             {
                 Operand n = GetVec(op.Rn);
                 Operand m = GetVec(op.Rm);
@@ -3048,7 +3862,11 @@ namespace ARMeilleure.Instructions
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size < 2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UhsubV);
+            }
+            else if (Optimizations.UseSse2 && op.Size < 2)
             {
                 Operand n = GetVec(op.Rn);
                 Operand m = GetVec(op.Rm);
@@ -3079,7 +3897,11 @@ namespace ARMeilleure.Instructions
 
         public static void Umax_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmaxV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -3105,7 +3927,11 @@ namespace ARMeilleure.Instructions
 
         public static void Umaxp_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSsse3)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmaxpV);
+            }
+            else if (Optimizations.UseSsse3)
             {
                 EmitSsse3VectorPairwiseOp(context, X86PmaxuInstruction);
             }
@@ -3117,12 +3943,23 @@ namespace ARMeilleure.Instructions
 
         public static void Umaxv_V(ArmEmitterContext context)
         {
-            EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: false));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UmaxvV);
+            }
+            else
+            {
+                EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: false));
+            }
         }
 
         public static void Umin_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UminV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -3148,7 +3985,11 @@ namespace ARMeilleure.Instructions
 
         public static void Uminp_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSsse3)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UminpV);
+            }
+            else if (Optimizations.UseSsse3)
             {
                 EmitSsse3VectorPairwiseOp(context, X86PminuInstruction);
             }
@@ -3160,14 +4001,25 @@ namespace ARMeilleure.Instructions
 
         public static void Uminv_V(ArmEmitterContext context)
         {
-            EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: false));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UminvV);
+            }
+            else
+            {
+                EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: false));
+            }
         }
 
         public static void Umlal_V(ArmEmitterContext context)
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UseSse41 && op.Size < 2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UmlalV);
+            }
+            else if (Optimizations.UseSse41 && op.Size < 2)
             {
                 Operand d = GetVec(op.Rd);
                 Operand n = GetVec(op.Rn);
@@ -3203,17 +4055,28 @@ namespace ARMeilleure.Instructions
 
         public static void Umlal_Ve(ArmEmitterContext context)
         {
-            EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return context.Add(op1, context.Multiply(op2, op3));
-            });
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64UmlalVe);
+            }
+            else
+            {
+                EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) =>
+                {
+                    return context.Add(op1, context.Multiply(op2, op3));
+                });
+            }
         }
 
         public static void Umlsl_V(ArmEmitterContext context)
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UseSse41 && op.Size < 2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UmlslV);
+            }
+            else if (Optimizations.UseSse41 && op.Size < 2)
             {
                 Operand d = GetVec(op.Rd);
                 Operand n = GetVec(op.Rn);
@@ -3249,57 +4112,124 @@ namespace ARMeilleure.Instructions
 
         public static void Umlsl_Ve(ArmEmitterContext context)
         {
-            EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return context.Subtract(op1, context.Multiply(op2, op3));
-            });
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64UmlslVe);
+            }
+            else
+            {
+                EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) =>
+                {
+                    return context.Subtract(op1, context.Multiply(op2, op3));
+                });
+            }
         }
 
         public static void Umull_V(ArmEmitterContext context)
         {
-            EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmullV);
+            }
+            else
+            {
+                EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2));
+            }
         }
 
         public static void Umull_Ve(ArmEmitterContext context)
         {
-            EmitVectorWidenBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64UmullVe);
+            }
+            else
+            {
+                EmitVectorWidenBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2));
+            }
         }
 
         public static void Uqadd_S(ArmEmitterContext context)
         {
-            EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Add);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64UqaddS);
+            }
+            else
+            {
+                EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Add);
+            }
         }
 
         public static void Uqadd_V(ArmEmitterContext context)
         {
-            EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Add);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqaddV);
+            }
+            else
+            {
+                EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Add);
+            }
         }
 
         public static void Uqsub_S(ArmEmitterContext context)
         {
-            EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Sub);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64UqsubS);
+            }
+            else
+            {
+                EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Sub);
+            }
         }
 
         public static void Uqsub_V(ArmEmitterContext context)
         {
-            EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Sub);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqsubV);
+            }
+            else
+            {
+                EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Sub);
+            }
         }
 
         public static void Uqxtn_S(ArmEmitterContext context)
         {
-            EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarZxZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64UqxtnS);
+            }
+            else
+            {
+                EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarZxZx);
+            }
         }
 
         public static void Uqxtn_V(ArmEmitterContext context)
         {
-            EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorZxZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64UqxtnV);
+            }
+            else
+            {
+                EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorZxZx);
+            }
         }
 
         public static void Urhadd_V(ArmEmitterContext context)
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size < 2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UrhaddV);
+            }
+            else if (Optimizations.UseSse2 && op.Size < 2)
             {
                 Operand n = GetVec(op.Rn);
                 Operand m = GetVec(op.Rm);
@@ -3330,17 +4260,35 @@ namespace ARMeilleure.Instructions
 
         public static void Usqadd_S(ArmEmitterContext context)
         {
-            EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64UsqaddS);
+            }
+            else
+            {
+                EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate);
+            }
         }
 
         public static void Usqadd_V(ArmEmitterContext context)
         {
-            EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64UsqaddV);
+            }
+            else
+            {
+                EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate);
+            }
         }
 
         public static void Usubl_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UsublV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -3370,7 +4318,11 @@ namespace ARMeilleure.Instructions
 
         public static void Usubw_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UsubwV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
index 79b376e959..a9994e4121 100644
--- a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
@@ -2,6 +2,7 @@
 using ARMeilleure.IntermediateRepresentation;
 using ARMeilleure.Translation;
 using System;
+
 using static ARMeilleure.Instructions.InstEmitFlowHelper;
 using static ARMeilleure.Instructions.InstEmitHelper;
 using static ARMeilleure.Instructions.InstEmitSimdHelper;
@@ -30,7 +31,11 @@ namespace ARMeilleure.Instructions
         {
             OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
 
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FabsS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarUnaryOpSimd32(context, (m) =>
                 {
@@ -49,7 +54,11 @@ namespace ARMeilleure.Instructions
 
             if (op.F)
             {
-                if (Optimizations.FastFP && Optimizations.UseSse2)
+                if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+                {
+                    InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FabsV);
+                }
+                else if (Optimizations.FastFP && Optimizations.UseSse2)
                 {
                     EmitVectorUnaryOpSimd32(context, (m) =>
                     {
@@ -76,7 +85,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vadd_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FaddS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarBinaryOpF32(context, Intrinsic.X86Addss, Intrinsic.X86Addsd);
             }
@@ -92,7 +105,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vadd_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FaddV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpF32(context, Intrinsic.X86Addps, Intrinsic.X86Addpd);
             }
@@ -280,7 +297,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vfma_S(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseFma)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmaddS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseFma)
             {
                 EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmadd231ss, Intrinsic.X86Vfmadd231sd);
             }
@@ -299,7 +320,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vfma_V(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseFma)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlaV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseFma)
             {
                 EmitVectorTernaryOpF32(context, Intrinsic.X86Vfmadd231ps);
             }
@@ -314,7 +339,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vfms_S(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseFma)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmsubS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseFma)
             {
                 EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmadd231ss, Intrinsic.X86Vfnmadd231sd);
             }
@@ -333,7 +362,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vfms_V(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseFma)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlsV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseFma)
             {
                 EmitVectorTernaryOpF32(context, Intrinsic.X86Vfnmadd231ps);
             }
@@ -348,7 +381,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vfnma_S(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseFma)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmaddS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseFma)
             {
                 EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmsub231ss, Intrinsic.X86Vfnmsub231sd);
             }
@@ -367,7 +404,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vfnms_S(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseFma)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmsubS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseFma)
             {
                 EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmsub231ss, Intrinsic.X86Vfmsub231sd);
             }
@@ -419,7 +460,11 @@ namespace ARMeilleure.Instructions
         {
             OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
 
-            if (Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FnegS);
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitScalarUnaryOpSimd32(context, (m) =>
                 {
@@ -445,7 +490,11 @@ namespace ARMeilleure.Instructions
         {
             OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
 
-            if (Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FnmulS);
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitScalarBinaryOpSimd32(context, (n, m) =>
                 {
@@ -473,7 +522,11 @@ namespace ARMeilleure.Instructions
         {
             OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
 
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmaddS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd, isNegD: true);
             }
@@ -498,7 +551,11 @@ namespace ARMeilleure.Instructions
         {
             OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
 
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmsubS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd, isNegD: true);
             }
@@ -525,7 +582,11 @@ namespace ARMeilleure.Instructions
 
             if (op.F)
             {
-                if (Optimizations.FastFP && Optimizations.UseSse2)
+                if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+                {
+                    InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FnegV);
+                }
+                else if (Optimizations.FastFP && Optimizations.UseSse2)
                 {
                     EmitVectorUnaryOpSimd32(context, (m) =>
                     {
@@ -554,7 +615,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vdiv_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FdivS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarBinaryOpF32(context, Intrinsic.X86Divss, Intrinsic.X86Divsd);
             }
@@ -573,7 +638,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vmaxnm_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FmaxnmS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse41MaxMinNumOpF32(context, true, true);
             }
@@ -585,7 +654,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vmaxnm_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmaxnmV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse41MaxMinNumOpF32(context, true, false);
             }
@@ -597,7 +670,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vminnm_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FminnmS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse41MaxMinNumOpF32(context, false, true);
             }
@@ -609,7 +686,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vminnm_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FminnmV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse41MaxMinNumOpF32(context, false, false);
             }
@@ -621,7 +702,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vmax_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmaxV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpF32(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd);
             }
@@ -664,7 +749,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vmin_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FminV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpF32(context, Intrinsic.X86Minps, Intrinsic.X86Minpd);
             }
@@ -707,7 +796,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vmla_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmaddS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd);
             }
@@ -730,7 +823,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vmla_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlaV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd);
             }
@@ -786,7 +883,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vmls_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmlsV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd);
             }
@@ -809,7 +910,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vmls_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlsV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Subps, Intrinsic.X86Subpd);
             }
@@ -865,7 +970,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vmul_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FmulS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarBinaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd);
             }
@@ -884,7 +993,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vmul_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmulV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
             }
@@ -975,7 +1088,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vpadd_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FaddpV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Addps);
             }
@@ -1008,7 +1125,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vpmax_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FmaxpV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Maxps);
             }
@@ -1038,7 +1159,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vpmin_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FminpV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Minps);
             }
@@ -1217,7 +1342,11 @@ namespace ARMeilleure.Instructions
             {
                 int sizeF = op.Size & 1;
 
-                if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0)
+                if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+                {
+                    InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrecpeV);
+                }
+                else if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0)
                 {
                     EmitVectorUnaryOpF32(context, Intrinsic.X86Rcpps, 0);
                 }
@@ -1237,7 +1366,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vrecps(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FrecpsV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
                 bool single = (op.Size & 1) == 0;
@@ -1304,7 +1437,11 @@ namespace ARMeilleure.Instructions
             {
                 int sizeF = op.Size & 1;
 
-                if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0)
+                if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+                {
+                    InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrsqrteV);
+                }
+                else if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0)
                 {
                     EmitVectorUnaryOpF32(context, Intrinsic.X86Rsqrtps, 0);
                 }
@@ -1324,7 +1461,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vrsqrts(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FrsqrtsV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
                 bool single = (op.Size & 1) == 0;
@@ -1393,7 +1534,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vsqrt_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FsqrtS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarUnaryOpF32(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd);
             }
@@ -1408,7 +1553,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vsub_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FsubS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarBinaryOpF32(context, Intrinsic.X86Subss, Intrinsic.X86Subsd);
             }
@@ -1420,7 +1569,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vsub_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FsubV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpF32(context, Intrinsic.X86Subps, Intrinsic.X86Subpd);
             }
diff --git a/ARMeilleure/Instructions/InstEmitSimdCmp.cs b/ARMeilleure/Instructions/InstEmitSimdCmp.cs
index 71055155c7..c32b64ba16 100644
--- a/ARMeilleure/Instructions/InstEmitSimdCmp.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdCmp.cs
@@ -466,12 +466,26 @@ namespace ARMeilleure.Instructions
 
         public static void Fcmp_S(ArmEmitterContext context)
         {
-            EmitFcmpOrFcmpe(context, signalNaNs: false);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitFcmpOrFcmpe(context, signalNaNs: false);
+            }
+            else
+            {
+                EmitFcmpOrFcmpe(context, signalNaNs: false);
+            }
         }
 
         public static void Fcmpe_S(ArmEmitterContext context)
         {
-            EmitFcmpOrFcmpe(context, signalNaNs: true);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitFcmpOrFcmpe(context, signalNaNs: true);
+            }
+            else
+            {
+                EmitFcmpOrFcmpe(context, signalNaNs: true);
+            }
         }
 
         private static void EmitFccmpOrFccmpe(ArmEmitterContext context, bool signalNaNs)
diff --git a/ARMeilleure/Instructions/InstEmitSimdCmp32.cs b/ARMeilleure/Instructions/InstEmitSimdCmp32.cs
index 339d329399..a990e057d1 100644
--- a/ARMeilleure/Instructions/InstEmitSimdCmp32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdCmp32.cs
@@ -17,7 +17,11 @@ namespace ARMeilleure.Instructions
     {
         public static void Vceq_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.Equal, false);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitSse2OrAvxCmpOpF32(context, CmpCondition.Equal, false);
             }
@@ -38,7 +42,11 @@ namespace ARMeilleure.Instructions
 
             if (op.F)
             {
-                if (Optimizations.FastFP && Optimizations.UseSse2)
+                if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+                {
+                    InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.Equal, true);
+                }
+                else if (Optimizations.FastFP && Optimizations.UseSse2)
                 {
                     EmitSse2OrAvxCmpOpF32(context, CmpCondition.Equal, true);
                 }
@@ -55,7 +63,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vcge_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseAvx)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThanOrEqual, false);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseAvx)
             {
                 EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThanOrEqual, false);
             }
@@ -78,7 +90,11 @@ namespace ARMeilleure.Instructions
 
             if (op.F)
             {
-                if (Optimizations.FastFP && Optimizations.UseAvx)
+                if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+                {
+                    InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThanOrEqual, true);
+                }
+                else if (Optimizations.FastFP && Optimizations.UseAvx)
                 {
                     EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThanOrEqual, true);
                 }
@@ -95,7 +111,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vcgt_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseAvx)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThan, false);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseAvx)
             {
                 EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThan, false);
             }
@@ -118,7 +138,11 @@ namespace ARMeilleure.Instructions
 
             if (op.F)
             {
-                if (Optimizations.FastFP && Optimizations.UseAvx)
+                if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+                {
+                    InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThan, true);
+                }
+                else if (Optimizations.FastFP && Optimizations.UseAvx)
                 {
                     EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThan, true);
                 }
@@ -139,7 +163,11 @@ namespace ARMeilleure.Instructions
 
             if (op.F)
             {
-                if (Optimizations.FastFP && Optimizations.UseSse2)
+                if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+                {
+                    InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.LessThanOrEqual, true);
+                }
+                else if (Optimizations.FastFP && Optimizations.UseSse2)
                 {
                     EmitSse2OrAvxCmpOpF32(context, CmpCondition.LessThanOrEqual, true);
                 }
@@ -160,7 +188,11 @@ namespace ARMeilleure.Instructions
 
             if (op.F)
             {
-                if (Optimizations.FastFP && Optimizations.UseSse2)
+                if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+                {
+                    InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.LessThan, true);
+                }
+                else if (Optimizations.FastFP && Optimizations.UseSse2)
                 {
                     EmitSse2OrAvxCmpOpF32(context, CmpCondition.LessThan, true);
                 }
@@ -247,12 +279,26 @@ namespace ARMeilleure.Instructions
 
         public static void Vcmp(ArmEmitterContext context)
         {
-            EmitVcmpOrVcmpe(context, false);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVcmpOrVcmpe(context, false);
+            }
+            else
+            {
+                EmitVcmpOrVcmpe(context, false);
+            }
         }
 
         public static void Vcmpe(ArmEmitterContext context)
         {
-            EmitVcmpOrVcmpe(context, true);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVcmpOrVcmpe(context, true);
+            }
+            else
+            {
+                EmitVcmpOrVcmpe(context, true);
+            }
         }
 
         private static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs)
diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt.cs b/ARMeilleure/Instructions/InstEmitSimdCvt.cs
index 7f61cad41c..652ad397c1 100644
--- a/ARMeilleure/Instructions/InstEmitSimdCvt.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdCvt.cs
@@ -164,7 +164,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtas_Gp(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtasGp);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false);
             }
@@ -176,7 +180,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtas_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtasS);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: true);
             }
@@ -188,7 +196,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtas_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtasS);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: false);
             }
@@ -200,7 +212,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtau_Gp(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtauGp);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41Fcvtu_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false);
             }
@@ -212,7 +228,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtau_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtauS);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: true);
             }
@@ -224,7 +244,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtau_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtauV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: false);
             }
@@ -240,7 +264,11 @@ namespace ARMeilleure.Instructions
 
             int sizeF = op.Size & 1;
 
-            if (Optimizations.UseSse2 && sizeF == 1)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtlV);
+            }
+            else if (Optimizations.UseSse2 && sizeF == 1)
             {
                 Operand n = GetVec(op.Rn);
 
@@ -296,7 +324,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtms_Gp(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtmsGp);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false);
             }
@@ -308,7 +340,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtms_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtmsV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsMinusInfinity, scalar: false);
             }
@@ -320,7 +356,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtmu_Gp(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtmuGp);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false);
             }
@@ -336,7 +376,11 @@ namespace ARMeilleure.Instructions
 
             int sizeF = op.Size & 1;
 
-            if (Optimizations.UseSse2 && sizeF == 1)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpFRd(context, Intrinsic.Arm64FcvtnV);
+            }
+            else if (Optimizations.UseSse2 && sizeF == 1)
             {
                 Operand d = GetVec(op.Rd);
 
@@ -405,7 +449,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtns_Gp(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtnsGp);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearest, isFixed: false);
             }
@@ -417,7 +465,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtns_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtnsS);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: true);
             }
@@ -429,7 +481,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtns_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtnsV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: false);
             }
@@ -441,7 +497,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtnu_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtnuS);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: true);
             }
@@ -453,7 +513,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtnu_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtnuV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: false);
             }
@@ -465,7 +529,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtps_Gp(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtpsGp);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false);
             }
@@ -477,7 +545,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtpu_Gp(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtpuGp);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false);
             }
@@ -489,7 +561,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtzs_Gp(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtzsGp);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: false);
             }
@@ -501,7 +577,13 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtzs_Gp_Fixed(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+                InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFToGp(context, Intrinsic.Arm64FcvtzsGpFixed, op.FBits);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: true);
             }
@@ -513,7 +595,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtzs_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtzsS);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: true);
             }
@@ -525,7 +611,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtzs_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtzsV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false);
             }
@@ -537,7 +627,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtzs_V_Fixed(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64FcvtzsVFixed, GetFBits(context));
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false);
             }
@@ -549,7 +643,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtzu_Gp(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtzuGp);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: false);
             }
@@ -561,7 +659,13 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtzu_Gp_Fixed(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+                InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFToGp(context, Intrinsic.Arm64FcvtzuGpFixed, op.FBits);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: true);
             }
@@ -573,7 +677,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtzu_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtzuS);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: true);
             }
@@ -585,7 +693,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtzu_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtzuV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false);
             }
@@ -597,7 +709,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtzu_V_Fixed(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64FcvtzuVFixed, GetFBits(context));
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false);
             }
@@ -609,41 +725,59 @@ namespace ARMeilleure.Instructions
 
         public static void Scvtf_Gp(ArmEmitterContext context)
         {
-            OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
-
-            Operand res = GetIntOrZR(context, op.Rn);
-
-            if (op.RegisterSize == RegisterSize.Int32)
+            if (Optimizations.UseAdvSimd)
             {
-                res = context.SignExtend32(OperandType.I64, res);
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpFFromGp(context, Intrinsic.Arm64ScvtfGp);
             }
+            else
+            {
+                OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
 
-            res = EmitFPConvert(context, res, op.Size, signed: true);
+                Operand res = GetIntOrZR(context, op.Rn);
 
-            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+                if (op.RegisterSize == RegisterSize.Int32)
+                {
+                    res = context.SignExtend32(OperandType.I64, res);
+                }
+
+                res = EmitFPConvert(context, res, op.Size, signed: true);
+
+                context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+            }
         }
 
         public static void Scvtf_Gp_Fixed(ArmEmitterContext context)
         {
             OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
 
-            Operand res = GetIntOrZR(context, op.Rn);
-
-            if (op.RegisterSize == RegisterSize.Int32)
+            if (Optimizations.UseAdvSimd)
             {
-                res = context.SignExtend32(OperandType.I64, res);
+                InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFFromGp(context, Intrinsic.Arm64ScvtfGpFixed, op.FBits);
             }
+            else
+            {
+                Operand res = GetIntOrZR(context, op.Rn);
 
-            res = EmitFPConvert(context, res, op.Size, signed: true);
+                if (op.RegisterSize == RegisterSize.Int32)
+                {
+                    res = context.SignExtend32(OperandType.I64, res);
+                }
 
-            res = EmitI2fFBitsMul(context, res, op.FBits);
+                res = EmitFPConvert(context, res, op.Size, signed: true);
 
-            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+                res = EmitI2fFBitsMul(context, res, op.FBits);
+
+                context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+            }
         }
 
         public static void Scvtf_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64ScvtfS);
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitSse2ScvtfOp(context, scalar: true);
             }
@@ -655,7 +789,11 @@ namespace ARMeilleure.Instructions
 
         public static void Scvtf_S_Fixed(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpF(context, Intrinsic.Arm64ScvtfSFixed, GetFBits(context));
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitSse2ScvtfOp(context, scalar: true);
             }
@@ -667,7 +805,11 @@ namespace ARMeilleure.Instructions
 
         public static void Scvtf_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64ScvtfV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitSse2ScvtfOp(context, scalar: false);
             }
@@ -679,7 +821,11 @@ namespace ARMeilleure.Instructions
 
         public static void Scvtf_V_Fixed(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64ScvtfVFixed, GetFBits(context));
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitSse2ScvtfOp(context, scalar: false);
             }
@@ -691,31 +837,49 @@ namespace ARMeilleure.Instructions
 
         public static void Ucvtf_Gp(ArmEmitterContext context)
         {
-            OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpFFromGp(context, Intrinsic.Arm64UcvtfGp);
+            }
+            else
+            {
+                OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
 
-            Operand res = GetIntOrZR(context, op.Rn);
+                Operand res = GetIntOrZR(context, op.Rn);
 
-            res = EmitFPConvert(context, res, op.Size, signed: false);
+                res = EmitFPConvert(context, res, op.Size, signed: false);
 
-            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+                context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+            }
         }
 
         public static void Ucvtf_Gp_Fixed(ArmEmitterContext context)
         {
             OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
 
-            Operand res = GetIntOrZR(context, op.Rn);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFFromGp(context, Intrinsic.Arm64UcvtfGpFixed, op.FBits);
+            }
+            else
+            {
+                Operand res = GetIntOrZR(context, op.Rn);
 
-            res = EmitFPConvert(context, res, op.Size, signed: false);
+                res = EmitFPConvert(context, res, op.Size, signed: false);
 
-            res = EmitI2fFBitsMul(context, res, op.FBits);
+                res = EmitI2fFBitsMul(context, res, op.FBits);
 
-            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+                context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+            }
         }
 
         public static void Ucvtf_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64UcvtfS);
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitSse2UcvtfOp(context, scalar: true);
             }
@@ -727,7 +891,11 @@ namespace ARMeilleure.Instructions
 
         public static void Ucvtf_S_Fixed(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpF(context, Intrinsic.Arm64UcvtfSFixed, GetFBits(context));
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitSse2UcvtfOp(context, scalar: true);
             }
@@ -739,7 +907,11 @@ namespace ARMeilleure.Instructions
 
         public static void Ucvtf_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64UcvtfV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitSse2UcvtfOp(context, scalar: false);
             }
@@ -751,7 +923,11 @@ namespace ARMeilleure.Instructions
 
         public static void Ucvtf_V_Fixed(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64UcvtfVFixed, GetFBits(context));
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitSse2UcvtfOp(context, scalar: false);
             }
diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
index f3f239589e..5fdc3b5ad7 100644
--- a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
@@ -59,7 +59,11 @@ namespace ARMeilleure.Instructions
 
             if (toInteger)
             {
-                if (Optimizations.UseSse41)
+                if (Optimizations.UseAdvSimd)
+                {
+                    InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuV : Intrinsic.Arm64FcvtzsV);
+                }
+                else if (Optimizations.UseSse41)
                 {
                     EmitSse41ConvertVector32(context, FPRoundingMode.TowardsZero, !unsigned);
                 }
@@ -153,7 +157,28 @@ namespace ARMeilleure.Instructions
                 bool unsigned = (op.Opc2 & 1) == 0;
                 bool roundWithFpscr = op.Opc != 1;
 
-                if (!roundWithFpscr && Optimizations.UseSse41)
+                if (!roundWithFpscr && Optimizations.UseAdvSimd)
+                {
+                    bool doubleSize = floatSize == OperandType.FP64;
+
+                    if (doubleSize)
+                    {
+                        Operand m = GetVecA32(op.Vm >> 1);
+
+                        Operand toConvert = InstEmitSimdHelper32Arm64.EmitExtractScalar(context, m, op.Vm, doubleSize);
+
+                        Intrinsic inst = (unsigned ? Intrinsic.Arm64FcvtzuGp : Intrinsic.Arm64FcvtzsGp) | Intrinsic.Arm64VDouble;
+
+                        Operand asInteger = context.AddIntrinsicInt(inst, toConvert);
+
+                        InsertScalar(context, op.Vd, asInteger);
+                    }
+                    else
+                    {
+                        InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuS : Intrinsic.Arm64FcvtzsS);
+                    }
+                }
+                else if (!roundWithFpscr && Optimizations.UseSse41)
                 {
                     EmitSse41ConvertInt32(context, FPRoundingMode.TowardsZero, !unsigned);
                 }
@@ -231,7 +256,34 @@ namespace ARMeilleure.Instructions
             bool unsigned = op.Opc == 0;
             int rm = op.Opc2 & 3;
 
-            if (Optimizations.UseSse41)
+            Intrinsic inst;
+
+            if (Optimizations.UseAdvSimd)
+            {
+                if (unsigned)
+                {
+                    inst = rm switch {
+                        0b00 => Intrinsic.Arm64FcvtauS,
+                        0b01 => Intrinsic.Arm64FcvtnuS,
+                        0b10 => Intrinsic.Arm64FcvtpuS,
+                        0b11 => Intrinsic.Arm64FcvtmuS,
+                        _ => throw new ArgumentOutOfRangeException(nameof(rm))
+                    };
+                }
+                else
+                {
+                    inst = rm switch {
+                        0b00 => Intrinsic.Arm64FcvtasS,
+                        0b01 => Intrinsic.Arm64FcvtnsS,
+                        0b10 => Intrinsic.Arm64FcvtpsS,
+                        0b11 => Intrinsic.Arm64FcvtmsS,
+                        _ => throw new ArgumentOutOfRangeException(nameof(rm))
+                    };
+                }
+
+                InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41ConvertInt32(context, RMToRoundMode(rm), !unsigned);
             }
@@ -338,7 +390,19 @@ namespace ARMeilleure.Instructions
 
             int rm = op.Opc2 & 3;
 
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                Intrinsic inst = rm switch {
+                    0b00 => Intrinsic.Arm64FrintaS,
+                    0b01 => Intrinsic.Arm64FrintnS,
+                    0b10 => Intrinsic.Arm64FrintpS,
+                    0b11 => Intrinsic.Arm64FrintmS,
+                    _ => throw new ArgumentOutOfRangeException(nameof(rm))
+                };
+
+                InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitScalarUnaryOpSimd32(context, (m) =>
                 {
@@ -382,12 +446,9 @@ namespace ARMeilleure.Instructions
         // VRINTA (vector).
         public static void Vrinta_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
             {
-                EmitVectorUnaryOpSimd32(context, (m) =>
-                {
-                    return EmitSse41RoundToNearestWithTiesToAwayOpF(context, m, scalar: false);
-                });
+                InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintaS);
             }
             else
             {
@@ -398,7 +459,11 @@ namespace ARMeilleure.Instructions
         // VRINTM (vector).
         public static void Vrintm_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintmS);
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitVectorUnaryOpSimd32(context, (m) =>
                 {
@@ -414,7 +479,11 @@ namespace ARMeilleure.Instructions
         // VRINTN (vector).
         public static void Vrintn_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintnS);
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitVectorUnaryOpSimd32(context, (m) =>
                 {
@@ -430,7 +499,11 @@ namespace ARMeilleure.Instructions
         // VRINTP (vector).
         public static void Vrintp_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintpS);
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitVectorUnaryOpSimd32(context, (m) =>
                 {
@@ -448,7 +521,11 @@ namespace ARMeilleure.Instructions
         {
             OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
 
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintzS);
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitScalarUnaryOpSimd32(context, (m) =>
                 {
diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs
new file mode 100644
index 0000000000..98236be6d6
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs
@@ -0,0 +1,366 @@
+
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+    using Func1I = Func<Operand, Operand>;
+    using Func2I = Func<Operand, Operand, Operand>;
+    using Func3I = Func<Operand, Operand, Operand, Operand>;
+
+    static class InstEmitSimdHelper32Arm64
+    {
+        // Intrinsic Helpers
+
+        public static Operand EmitMoveDoubleWordToSide(ArmEmitterContext context, Operand input, int originalV, int targetV)
+        {
+            Debug.Assert(input.Type == OperandType.V128);
+
+            int originalSide = originalV & 1;
+            int targetSide = targetV & 1;
+
+            if (originalSide == targetSide)
+            {
+                return input;
+            }
+
+            Intrinsic vType = Intrinsic.Arm64VDWord | Intrinsic.Arm64V128;
+
+            if (targetSide == 1)
+            {
+                return context.AddIntrinsic(Intrinsic.Arm64DupVe | vType, input, Const(OperandType.I32, 0)); // Low to high.
+            }
+            else
+            {
+                return context.AddIntrinsic(Intrinsic.Arm64DupVe | vType, input, Const(OperandType.I32, 1)); // High to low.
+            }
+        }
+
+        public static Operand EmitDoubleWordInsert(ArmEmitterContext context, Operand target, Operand value, int targetV)
+        {
+            Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
+
+            int targetSide = targetV & 1;
+            Operand idx = Const(targetSide);
+
+            return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, target, idx, value, idx);
+        }
+
+        public static Operand EmitScalarInsert(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth)
+        {
+            Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
+
+            // Insert from index 0 in value to index in target.
+            int index = reg & (doubleWidth ? 1 : 3);
+
+            if (doubleWidth)
+            {
+                return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, target, Const(index), value, Const(0));
+            }
+            else
+            {
+                return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VWord, target, Const(index), value, Const(0));
+            }
+        }
+
+        public static Operand EmitExtractScalar(ArmEmitterContext context, Operand target, int reg, bool doubleWidth)
+        {
+            int index = reg & (doubleWidth ? 1 : 3);
+            if (index == 0) return target; // Element is already at index 0, so just return the vector directly.
+
+            if (doubleWidth)
+            {
+                return context.AddIntrinsic(Intrinsic.Arm64DupSe | Intrinsic.Arm64VDWord, target, Const(1)); // Extract high (index 1).
+            }
+            else
+            {
+                return context.AddIntrinsic(Intrinsic.Arm64DupSe | Intrinsic.Arm64VWord, target, Const(index)); // Extract element at index.
+            }
+        }
+
+        // Vector Operand Templates
+
+        public static void EmitVectorUnaryOpSimd32(ArmEmitterContext context, Func1I vectorFunc)
+        {
+            OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+            Operand m = GetVecA32(op.Qm);
+            Operand d = GetVecA32(op.Qd);
+
+            if (!op.Q) // Register swap: move relevant doubleword to destination side.
+            {
+                m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd);
+            }
+
+            Operand res = vectorFunc(m);
+
+            if (!op.Q) // Register insert.
+            {
+                res = EmitDoubleWordInsert(context, d, res, op.Vd);
+            }
+
+            context.Copy(d, res);
+        }
+
+        public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+            inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+            EmitVectorUnaryOpSimd32(context, (m) => context.AddIntrinsic(inst, m));
+        }
+
+        public static void EmitVectorBinaryOpSimd32(ArmEmitterContext context, Func2I vectorFunc, int side = -1)
+        {
+            OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+            Operand n = GetVecA32(op.Qn);
+            Operand m = GetVecA32(op.Qm);
+            Operand d = GetVecA32(op.Qd);
+
+            if (side == -1)
+            {
+                side = op.Vd;
+            }
+
+            if (!op.Q) // Register swap: move relevant doubleword to destination side.
+            {
+                n = EmitMoveDoubleWordToSide(context, n, op.Vn, side);
+                m = EmitMoveDoubleWordToSide(context, m, op.Vm, side);
+            }
+
+            Operand res = vectorFunc(n, m);
+
+            if (!op.Q) // Register insert.
+            {
+                if (side != op.Vd)
+                {
+                    res = EmitMoveDoubleWordToSide(context, res, side, op.Vd);
+                }
+                res = EmitDoubleWordInsert(context, d, res, op.Vd);
+            }
+
+            context.Copy(d, res);
+        }
+
+        public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+            inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+            EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
+        }
+
+        public static void EmitVectorTernaryOpSimd32(ArmEmitterContext context, Func3I vectorFunc)
+        {
+            OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+            Operand n = GetVecA32(op.Qn);
+            Operand m = GetVecA32(op.Qm);
+            Operand d = GetVecA32(op.Qd);
+            Operand initialD = d;
+
+            if (!op.Q) // Register swap: move relevant doubleword to destination side.
+            {
+                n = EmitMoveDoubleWordToSide(context, n, op.Vn, op.Vd);
+                m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd);
+            }
+
+            Operand res = vectorFunc(d, n, m);
+
+            if (!op.Q) // Register insert.
+            {
+                res = EmitDoubleWordInsert(context, initialD, res, op.Vd);
+            }
+
+            context.Copy(initialD, res);
+        }
+
+        public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+            inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+            EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(inst, d, n, m));
+        }
+
+        public static void EmitScalarUnaryOpSimd32(ArmEmitterContext context, Func1I scalarFunc)
+        {
+            OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+            bool doubleSize = (op.Size & 1) != 0;
+            int shift = doubleSize ? 1 : 2;
+            Operand m = GetVecA32(op.Vm >> shift);
+            Operand d = GetVecA32(op.Vd >> shift);
+
+            m = EmitExtractScalar(context, m, op.Vm, doubleSize);
+
+            Operand res = scalarFunc(m);
+
+            // Insert scalar into vector.
+            res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
+
+            context.Copy(d, res);
+        }
+
+        public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+            inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+            EmitScalarUnaryOpSimd32(context, (m) => (inst == 0) ? m : context.AddIntrinsic(inst, m));
+        }
+
+        public static void EmitScalarBinaryOpSimd32(ArmEmitterContext context, Func2I scalarFunc)
+        {
+            OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+            bool doubleSize = (op.Size & 1) != 0;
+            int shift = doubleSize ? 1 : 2;
+            Operand n = GetVecA32(op.Vn >> shift);
+            Operand m = GetVecA32(op.Vm >> shift);
+            Operand d = GetVecA32(op.Vd >> shift);
+
+            n = EmitExtractScalar(context, n, op.Vn, doubleSize);
+            m = EmitExtractScalar(context, m, op.Vm, doubleSize);
+
+            Operand res = scalarFunc(n, m);
+
+            // Insert scalar into vector.
+            res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
+
+            context.Copy(d, res);
+        }
+
+        public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+            inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+            EmitScalarBinaryOpSimd32(context, (n, m) =>  context.AddIntrinsic(inst, n, m));
+        }
+
+        public static void EmitScalarTernaryOpSimd32(ArmEmitterContext context, Func3I scalarFunc)
+        {
+            OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+            bool doubleSize = (op.Size & 1) != 0;
+            int shift = doubleSize ? 1 : 2;
+            Operand n = GetVecA32(op.Vn >> shift);
+            Operand m = GetVecA32(op.Vm >> shift);
+            Operand d = GetVecA32(op.Vd >> shift);
+            Operand initialD = d;
+
+            n = EmitExtractScalar(context, n, op.Vn, doubleSize);
+            m = EmitExtractScalar(context, m, op.Vm, doubleSize);
+            d = EmitExtractScalar(context, d, op.Vd, doubleSize);
+
+            Operand res = scalarFunc(d, n, m);
+
+            // Insert scalar into vector.
+            res = EmitScalarInsert(context, initialD, res, op.Vd, doubleSize);
+
+            context.Copy(initialD, res);
+        }
+
+        public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+            inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+            EmitScalarTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(inst, d, n, m));
+        }
+
+        // Pairwise
+
+        public static void EmitVectorPairwiseOpF32(ArmEmitterContext context, Intrinsic inst32)
+        {
+            OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+            inst32 |= Intrinsic.Arm64V64 | Intrinsic.Arm64VFloat;
+            EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst32, n, m), 0);
+        }
+
+        public static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs)
+        {
+            OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+            bool cmpWithZero = (op.Opc & 2) != 0;
+
+            Intrinsic inst = signalNaNs ? Intrinsic.Arm64FcmpeS : Intrinsic.Arm64FcmpS;
+            inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+
+            bool doubleSize = (op.Size & 1) != 0;
+            int shift = doubleSize ? 1 : 2;
+            Operand n = GetVecA32(op.Vd >> shift);
+            Operand m = GetVecA32(op.Vm >> shift);
+
+            n = EmitExtractScalar(context, n, op.Vd, doubleSize);
+            m = cmpWithZero ? Const(0) : EmitExtractScalar(context, m, op.Vm, doubleSize);
+
+            Operand nzcv = context.AddIntrinsicInt(inst, n, m);
+
+            Operand one = Const(1);
+
+            SetFpFlag(context, FPState.VFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(28)), one));
+            SetFpFlag(context, FPState.CFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(29)), one));
+            SetFpFlag(context, FPState.ZFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(30)), one));
+            SetFpFlag(context, FPState.NFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(31)), one));
+        }
+
+        public static void EmitCmpOpF32(ArmEmitterContext context, CmpCondition cond, bool zero)
+        {
+            OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+            int sizeF = op.Size & 1;
+
+            Intrinsic inst;
+            if (zero)
+            {
+                inst = cond switch
+                {
+                    CmpCondition.Equal => Intrinsic.Arm64FcmeqVz,
+                    CmpCondition.GreaterThan => Intrinsic.Arm64FcmgtVz,
+                    CmpCondition.GreaterThanOrEqual => Intrinsic.Arm64FcmgeVz,
+                    CmpCondition.LessThan => Intrinsic.Arm64FcmltVz,
+                    CmpCondition.LessThanOrEqual => Intrinsic.Arm64FcmleVz,
+                    _ => throw new InvalidOperationException()
+                };
+            }
+            else {
+                inst = cond switch
+                {
+                    CmpCondition.Equal => Intrinsic.Arm64FcmeqV,
+                    CmpCondition.GreaterThan => Intrinsic.Arm64FcmgtV,
+                    CmpCondition.GreaterThanOrEqual => Intrinsic.Arm64FcmgeV,
+                    _ => throw new InvalidOperationException()
+                };
+            }
+
+            inst |= (sizeF != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+
+            if (zero)
+            {
+                EmitVectorUnaryOpSimd32(context, (m) =>
+                {
+                    return context.AddIntrinsic(inst, m);
+                });
+            }
+            else
+            {
+                EmitVectorBinaryOpSimd32(context, (n, m) =>
+                {
+                    return context.AddIntrinsic(inst, n, m);
+                });
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs b/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs
new file mode 100644
index 0000000000..f0d242ae29
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs
@@ -0,0 +1,720 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+    static class InstEmitSimdHelperArm64
+    {
+        public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
+        }
+
+        public static void EmitScalarUnaryOpFFromGp(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+            Operand n = GetIntOrZR(context, op.Rn);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
+        }
+
+        public static void EmitScalarUnaryOpFToGp(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            SetIntOrZR(context, op.Rd, op.RegisterSize == RegisterSize.Int32
+                ? context.AddIntrinsicInt (inst, n)
+                : context.AddIntrinsicLong(inst, n));
+        }
+
+        public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
+        }
+
+        public static void EmitScalarBinaryOpFByElem(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index)));
+        }
+
+        public static void EmitScalarTernaryOpF(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+            Operand a = GetVec(op.Ra);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, a, n, m));
+        }
+
+        public static void EmitScalarTernaryOpFRdByElem(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index)));
+        }
+
+        public static void EmitScalarUnaryOp(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
+        }
+
+        public static void EmitScalarBinaryOp(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
+        }
+
+        public static void EmitScalarBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n));
+        }
+
+        public static void EmitScalarTernaryOpRd(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            context.Copy(d, context.AddIntrinsic(inst, d, n, m));
+        }
+
+        public static void EmitScalarShiftBinaryOp(ArmEmitterContext context, Intrinsic inst, int shift)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(shift)));
+        }
+
+        public static void EmitScalarShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
+        }
+
+        public static void EmitScalarSaturatingShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
+
+            context.SetPendingQcFlagSync();
+        }
+
+        public static void EmitScalarSaturatingUnaryOp(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            Operand result = context.AddIntrinsic(inst, n);
+
+            context.Copy(GetVec(op.Rd), result);
+
+            context.SetPendingQcFlagSync();
+        }
+
+        public static void EmitScalarSaturatingBinaryOp(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            Operand result = context.AddIntrinsic(inst, n, m);
+
+            context.Copy(GetVec(op.Rd), result);
+
+            context.SetPendingQcFlagSync();
+        }
+
+        public static void EmitScalarSaturatingBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            Operand result = context.AddIntrinsic(inst, d, n);
+
+            context.Copy(GetVec(op.Rd), result);
+
+            context.SetPendingQcFlagSync();
+        }
+
+        public static void EmitScalarConvertBinaryOpF(ArmEmitterContext context, Intrinsic inst, int fBits)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits)));
+        }
+
+        public static void EmitScalarConvertBinaryOpFFromGp(ArmEmitterContext context, Intrinsic inst, int fBits)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetIntOrZR(context, op.Rn);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits)));
+        }
+
+        public static void EmitScalarConvertBinaryOpFToGp(ArmEmitterContext context, Intrinsic inst, int fBits)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            SetIntOrZR(context, op.Rd, op.RegisterSize == RegisterSize.Int32
+                ? context.AddIntrinsicInt (inst, n, Const(fBits))
+                : context.AddIntrinsicLong(inst, n, Const(fBits)));
+        }
+
+        public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
+        }
+
+        public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
+        }
+
+        public static void EmitVectorBinaryOpFRd(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n));
+        }
+
+        public static void EmitVectorBinaryOpFByElem(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index)));
+        }
+
+        public static void EmitVectorTernaryOpFRd(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(d, context.AddIntrinsic(inst, d, n, m));
+        }
+
+        public static void EmitVectorTernaryOpFRdByElem(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index)));
+        }
+
+        public static void EmitVectorUnaryOp(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
+        }
+
+        public static void EmitVectorBinaryOp(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
+        }
+
+        public static void EmitVectorBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n));
+        }
+
+        public static void EmitVectorBinaryOpByElem(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index)));
+        }
+
+        public static void EmitVectorTernaryOpRd(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(d, context.AddIntrinsic(inst, d, n, m));
+        }
+
+        public static void EmitVectorTernaryOpRdByElem(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index)));
+        }
+
+        public static void EmitVectorShiftBinaryOp(ArmEmitterContext context, Intrinsic inst, int shift)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(shift)));
+        }
+
+        public static void EmitVectorShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
+        }
+
+        public static void EmitVectorSaturatingShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
+
+            context.SetPendingQcFlagSync();
+        }
+
+        public static void EmitVectorSaturatingUnaryOp(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            Operand result = context.AddIntrinsic(inst, n);
+
+            context.Copy(GetVec(op.Rd), result);
+
+            context.SetPendingQcFlagSync();
+        }
+
+        public static void EmitVectorSaturatingBinaryOp(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            Operand result = context.AddIntrinsic(inst, n, m);
+
+            context.Copy(GetVec(op.Rd), result);
+
+            context.SetPendingQcFlagSync();
+        }
+
+        public static void EmitVectorSaturatingBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            Operand result = context.AddIntrinsic(inst, d, n);
+
+            context.Copy(GetVec(op.Rd), result);
+
+            context.SetPendingQcFlagSync();
+        }
+
+        public static void EmitVectorSaturatingBinaryOpByElem(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            Operand result = context.AddIntrinsic(inst, n, m, Const(op.Index));
+
+            context.Copy(GetVec(op.Rd), result);
+
+            context.SetPendingQcFlagSync();
+        }
+
+        public static void EmitVectorConvertBinaryOpF(ArmEmitterContext context, Intrinsic inst, int fBits)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits)));
+        }
+
+        public static void EmitVectorLookupTable(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdTbl op = (OpCodeSimdTbl)context.CurrOp;
+
+            Operand[] operands = new Operand[op.Size + 1];
+
+            operands[op.Size] = GetVec(op.Rm);
+
+            for (int index = 0; index < op.Size; index++)
+            {
+                operands[index] = GetVec((op.Rn + index) & 0x1F);
+            }
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, operands));
+        }
+
+        public static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            bool cmpWithZero = !(op is OpCodeSimdFcond) ? op.Bit3 : false;
+
+            Intrinsic inst = signalNaNs ? Intrinsic.Arm64FcmpeS : Intrinsic.Arm64FcmpS;
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            Operand n = GetVec(op.Rn);
+            Operand m = cmpWithZero ? Const(0) : GetVec(op.Rm);
+
+            Operand nzcv = context.AddIntrinsicInt(inst, n, m);
+
+            Operand one = Const(1);
+
+            SetFlag(context, PState.VFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(28)), one));
+            SetFlag(context, PState.CFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(29)), one));
+            SetFlag(context, PState.ZFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(30)), one));
+            SetFlag(context, PState.NFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(31)), one));
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitSimdLogical.cs b/ARMeilleure/Instructions/InstEmitSimdLogical.cs
index 624ae841d3..8ca815801a 100644
--- a/ARMeilleure/Instructions/InstEmitSimdLogical.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdLogical.cs
@@ -14,7 +14,11 @@ namespace ARMeilleure.Instructions
     {
         public static void And_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AndV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -38,7 +42,11 @@ namespace ARMeilleure.Instructions
 
         public static void Bic_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64BicV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -98,12 +106,26 @@ namespace ARMeilleure.Instructions
 
         public static void Bif_V(ArmEmitterContext context)
         {
-            EmitBifBit(context, notRm: true);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BifV);
+            }
+            else
+            {
+                EmitBifBit(context, notRm: true);
+            }
         }
 
         public static void Bit_V(ArmEmitterContext context)
         {
-            EmitBifBit(context, notRm: false);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BitV);
+            }
+            else
+            {
+                EmitBifBit(context, notRm: false);
+            }
         }
 
         private static void EmitBifBit(ArmEmitterContext context, bool notRm)
@@ -167,7 +189,11 @@ namespace ARMeilleure.Instructions
 
         public static void Bsl_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BslV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -200,7 +226,11 @@ namespace ARMeilleure.Instructions
 
         public static void Eor_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64EorV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -249,7 +279,11 @@ namespace ARMeilleure.Instructions
 
         public static void Orn_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrnV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -280,7 +314,11 @@ namespace ARMeilleure.Instructions
 
         public static void Orr_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrrV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
diff --git a/ARMeilleure/Instructions/InstEmitSimdLogical32.cs b/ARMeilleure/Instructions/InstEmitSimdLogical32.cs
index dd686d4dd4..c2a04778bb 100644
--- a/ARMeilleure/Instructions/InstEmitSimdLogical32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdLogical32.cs
@@ -13,7 +13,11 @@ namespace ARMeilleure.Instructions
     {
         public static void Vand_I(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64AndV | Intrinsic.Arm64V128, n, m));
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pand, n, m));
             }
@@ -25,7 +29,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vbic_I(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64BicV | Intrinsic.Arm64V128, n, m));
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pandn, m, n));
             }
@@ -73,17 +81,35 @@ namespace ARMeilleure.Instructions
 
         public static void Vbif(ArmEmitterContext context)
         {
-            EmitBifBit(context, true);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BifV | Intrinsic.Arm64V128, d, n, m));
+            }
+            else
+            {
+                EmitBifBit(context, true);
+            }
         }
 
         public static void Vbit(ArmEmitterContext context)
         {
-            EmitBifBit(context, false);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BitV | Intrinsic.Arm64V128, d, n, m));
+            }
+            else
+            {
+                EmitBifBit(context, false);
+            }
         }
 
         public static void Vbsl(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BslV | Intrinsic.Arm64V128, d, n, m));
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitVectorTernaryOpSimd32(context, (d, n, m) =>
                 {
@@ -105,7 +131,11 @@ namespace ARMeilleure.Instructions
 
         public static void Veor_I(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64EorV | Intrinsic.Arm64V128, n, m));
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pxor, n, m));
             }
@@ -117,7 +147,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vorn_I(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64OrnV | Intrinsic.Arm64V128, n, m));
+            }
+            else if (Optimizations.UseSse2)
             {
                 Operand mask = context.VectorOne();
 
@@ -135,7 +169,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vorr_I(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64OrrV | Intrinsic.Arm64V128, n, m));
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Por, n, m));
             }
diff --git a/ARMeilleure/Instructions/InstEmitSimdMove32.cs b/ARMeilleure/Instructions/InstEmitSimdMove32.cs
index 7da180fc9c..17100eb9c8 100644
--- a/ARMeilleure/Instructions/InstEmitSimdMove32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdMove32.cs
@@ -392,7 +392,11 @@ namespace ARMeilleure.Instructions
         {
             OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
 
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                EmitVectorZipUzpOpSimd32(context, Intrinsic.Arm64Zip1V, Intrinsic.Arm64Zip2V);
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitVectorShuffleOpSimd32(context, (m, d) =>
                 {
@@ -461,7 +465,11 @@ namespace ARMeilleure.Instructions
         {
             OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
 
-            if (Optimizations.UseSsse3)
+            if (Optimizations.UseAdvSimd)
+            {
+                EmitVectorZipUzpOpSimd32(context, Intrinsic.Arm64Uzp1V, Intrinsic.Arm64Uzp2V);
+            }
+            else if (Optimizations.UseSsse3)
             {
                 EmitVectorShuffleOpSimd32(context, (m, d) =>
                 {
@@ -559,6 +567,52 @@ namespace ARMeilleure.Instructions
             }
         }
 
+        private static void EmitVectorZipUzpOpSimd32(ArmEmitterContext context, Intrinsic inst1, Intrinsic inst2)
+        {
+            OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
+
+            bool overlap = op.Qm == op.Qd;
+
+            Operand d = GetVecA32(op.Qd);
+            Operand m = GetVecA32(op.Qm);
+
+            Operand dPart = d;
+            Operand mPart = m;
+
+            if (!op.Q) // Register swap: move relevant doubleword to destination side.
+            {
+                dPart = InstEmitSimdHelper32Arm64.EmitMoveDoubleWordToSide(context, d, op.Vd, 0);
+                mPart = InstEmitSimdHelper32Arm64.EmitMoveDoubleWordToSide(context, m, op.Vm, 0);
+            }
+
+            Intrinsic vSize = op.Q ? Intrinsic.Arm64V128 : Intrinsic.Arm64V64;
+
+            vSize |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            Operand resD = context.AddIntrinsic(inst1 | vSize, dPart, mPart);
+            Operand resM = context.AddIntrinsic(inst2 | vSize, dPart, mPart);
+
+            if (!op.Q) // Register insert.
+            {
+                resD = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, d, Const(op.Vd & 1), resD, Const(0));
+
+                if (overlap)
+                {
+                    resD = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, resD, Const(op.Vm & 1), resM, Const(0));
+                }
+                else
+                {
+                    resM = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, m, Const(op.Vm & 1), resM, Const(0));
+                }
+            }
+
+            context.Copy(d, resD);
+            if (!overlap)
+            {
+                context.Copy(m, resM);
+            }
+        }
+
         private static void EmitVectorShuffleOpSimd32(ArmEmitterContext context, Func<Operand, Operand, (Operand, Operand)> shuffleFunc)
         {
             OpCode32Simd op = (OpCode32Simd)context.CurrOp;
diff --git a/ARMeilleure/Instructions/InstEmitSimdShift.cs b/ARMeilleure/Instructions/InstEmitSimdShift.cs
index cf3b51bd6b..19e41119be 100644
--- a/ARMeilleure/Instructions/InstEmitSimdShift.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdShift.cs
@@ -26,7 +26,15 @@ namespace ARMeilleure.Instructions
 
         public static void Rshrn_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSsse3)
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64RshrnV, shift);
+            }
+            else if (Optimizations.UseSsse3)
             {
                 OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
 
@@ -80,7 +88,14 @@ namespace ARMeilleure.Instructions
 
             int shift = GetImmShl(op);
 
-            EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift)));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64ShlS, shift);
+            }
+            else
+            {
+                EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift)));
+            }
         }
 
         public static void Shl_V(ArmEmitterContext context)
@@ -90,7 +105,11 @@ namespace ARMeilleure.Instructions
             int shift = GetImmShl(op);
             int eSize = 8 << op.Size;
 
-            if (shift >= eSize)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64ShlV, shift);
+            }
+            else if (shift >= eSize)
             {
                 if ((op.RegisterSize == RegisterSize.Simd64))
                 {
@@ -143,7 +162,11 @@ namespace ARMeilleure.Instructions
 
             int shift = 8 << op.Size;
 
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ShllV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 Operand n = GetVec(op.Rn);
 
@@ -170,7 +193,15 @@ namespace ARMeilleure.Instructions
 
         public static void Shrn_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSsse3)
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64ShrnV, shift);
+            }
+            else if (Optimizations.UseSsse3)
             {
                 OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
 
@@ -205,89 +236,259 @@ namespace ARMeilleure.Instructions
 
         public static void Sli_S(ArmEmitterContext context)
         {
-            EmitSli(context, scalar: true);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShl(op);
+
+                InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SliS, shift);
+            }
+            else
+            {
+                EmitSli(context, scalar: true);
+            }
         }
 
         public static void Sli_V(ArmEmitterContext context)
         {
-            EmitSli(context, scalar: false);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShl(op);
+
+                InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SliV, shift);
+            }
+            else
+            {
+                EmitSli(context, scalar: false);
+            }
         }
 
         public static void Sqrshl_V(ArmEmitterContext context)
         {
-            EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round | ShlRegFlags.Saturating);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqrshlV);
+            }
+            else
+            {
+                EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round | ShlRegFlags.Saturating);
+            }
         }
 
         public static void Sqrshrn_S(ArmEmitterContext context)
         {
-            EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrnS, shift);
+            }
+            else
+            {
+                EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
+            }
         }
 
         public static void Sqrshrn_V(ArmEmitterContext context)
         {
-            EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrnV, shift);
+            }
+            else
+            {
+                EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
+            }
         }
 
         public static void Sqrshrun_S(ArmEmitterContext context)
         {
-            EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrunS, shift);
+            }
+            else
+            {
+                EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
+            }
         }
 
         public static void Sqrshrun_V(ArmEmitterContext context)
         {
-            EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrunV, shift);
+            }
+            else
+            {
+                EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+            }
         }
 
         public static void Sqshl_V(ArmEmitterContext context)
         {
-            EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Saturating);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqshlV);
+            }
+            else
+            {
+                EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Saturating);
+            }
         }
 
         public static void Sqshrn_S(ArmEmitterContext context)
         {
-            EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrnS, shift);
+            }
+            else
+            {
+                EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
+            }
         }
 
         public static void Sqshrn_V(ArmEmitterContext context)
         {
-            EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrnV, shift);
+            }
+            else
+            {
+                EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
+            }
         }
 
         public static void Sqshrun_S(ArmEmitterContext context)
         {
-            EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrunS, shift);
+            }
+            else
+            {
+                EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
+            }
         }
 
         public static void Sqshrun_V(ArmEmitterContext context)
         {
-            EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrunV, shift);
+            }
+            else
+            {
+                EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+            }
         }
 
         public static void Sri_S(ArmEmitterContext context)
         {
-            EmitSri(context, scalar: true);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SriS, shift);
+            }
+            else
+            {
+                EmitSri(context, scalar: true);
+            }
         }
 
         public static void Sri_V(ArmEmitterContext context)
         {
-            EmitSri(context, scalar: false);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SriV, shift);
+            }
+            else
+            {
+                EmitSri(context, scalar: false);
+            }
         }
 
         public static void Srshl_V(ArmEmitterContext context)
         {
-            EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SrshlV);
+            }
+            else
+            {
+                EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round);
+            }
         }
 
         public static void Srshr_S(ArmEmitterContext context)
         {
-            EmitScalarShrImmOpSx(context, ShrImmFlags.Round);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64SrshrS, shift);
+            }
+            else
+            {
+                EmitScalarShrImmOpSx(context, ShrImmFlags.Round);
+            }
         }
 
         public static void Srshr_V(ArmEmitterContext context)
         {
             OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+            if (Optimizations.UseAdvSimd)
+            {
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SrshrV, shift);
+            }
+            else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
             {
                 int shift = GetImmShr(op);
                 int eSize = 8 << op.Size;
@@ -325,14 +526,31 @@ namespace ARMeilleure.Instructions
 
         public static void Srsra_S(ArmEmitterContext context)
         {
-            EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SrsraS, shift);
+            }
+            else
+            {
+                EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+            }
         }
 
         public static void Srsra_V(ArmEmitterContext context)
         {
             OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+            if (Optimizations.UseAdvSimd)
+            {
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SrsraV, shift);
+            }
+            else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
             {
                 int shift = GetImmShr(op);
                 int eSize = 8 << op.Size;
@@ -372,12 +590,26 @@ namespace ARMeilleure.Instructions
 
         public static void Sshl_S(ArmEmitterContext context)
         {
-            EmitShlRegOp(context, ShlRegFlags.Scalar | ShlRegFlags.Signed);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64SshlS);
+            }
+            else
+            {
+                EmitShlRegOp(context, ShlRegFlags.Scalar | ShlRegFlags.Signed);
+            }
         }
 
         public static void Sshl_V(ArmEmitterContext context)
         {
-            EmitShlRegOp(context, ShlRegFlags.Signed);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SshlV);
+            }
+            else
+            {
+                EmitShlRegOp(context, ShlRegFlags.Signed);
+            }
         }
 
         public static void Sshll_V(ArmEmitterContext context)
@@ -386,7 +618,11 @@ namespace ARMeilleure.Instructions
 
             int shift = GetImmShl(op);
 
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SshllV, shift);
+            }
+            else if (Optimizations.UseSse41)
             {
                 Operand n = GetVec(op.Rn);
 
@@ -416,7 +652,18 @@ namespace ARMeilleure.Instructions
 
         public static void Sshr_S(ArmEmitterContext context)
         {
-            EmitShrImmOp(context, ShrImmFlags.ScalarSx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64SshrS, shift);
+            }
+            else
+            {
+                EmitShrImmOp(context, ShrImmFlags.ScalarSx);
+            }
         }
 
         public static void Sshr_V(ArmEmitterContext context)
@@ -425,7 +672,11 @@ namespace ARMeilleure.Instructions
 
             int shift = GetImmShr(op);
 
-            if (Optimizations.UseGfni && op.Size == 0)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SshrV, shift);
+            }
+            else if (Optimizations.UseGfni && op.Size == 0)
             {
                 Operand n = GetVec(op.Rn);
 
@@ -478,14 +729,31 @@ namespace ARMeilleure.Instructions
 
         public static void Ssra_S(ArmEmitterContext context)
         {
-            EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SsraS, shift);
+            }
+            else
+            {
+                EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate);
+            }
         }
 
         public static void Ssra_V(ArmEmitterContext context)
         {
             OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+            if (Optimizations.UseAdvSimd)
+            {
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SsraV, shift);
+            }
+            else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
             {
                 int shift = GetImmShr(op);
 
@@ -515,49 +783,131 @@ namespace ARMeilleure.Instructions
 
         public static void Uqrshl_V(ArmEmitterContext context)
         {
-            EmitShlRegOp(context, ShlRegFlags.Round | ShlRegFlags.Saturating);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqrshlV);
+            }
+            else
+            {
+                EmitShlRegOp(context, ShlRegFlags.Round | ShlRegFlags.Saturating);
+            }
         }
 
         public static void Uqrshrn_S(ArmEmitterContext context)
         {
-            EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqrshrnS, shift);
+            }
+            else
+            {
+                EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
+            }
         }
 
         public static void Uqrshrn_V(ArmEmitterContext context)
         {
-            EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqrshrnV, shift);
+            }
+            else
+            {
+                EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
+            }
         }
 
         public static void Uqshl_V(ArmEmitterContext context)
         {
-            EmitShlRegOp(context, ShlRegFlags.Saturating);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqshlV);
+            }
+            else
+            {
+                EmitShlRegOp(context, ShlRegFlags.Saturating);
+            }
         }
 
         public static void Uqshrn_S(ArmEmitterContext context)
         {
-            EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqshrnS, shift);
+            }
+            else
+            {
+                EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
+            }
         }
 
         public static void Uqshrn_V(ArmEmitterContext context)
         {
-            EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqshrnV, shift);
+            }
+            else
+            {
+                EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
+            }
         }
 
         public static void Urshl_V(ArmEmitterContext context)
         {
-            EmitShlRegOp(context, ShlRegFlags.Round);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UrshlV);
+            }
+            else
+            {
+                EmitShlRegOp(context, ShlRegFlags.Round);
+            }
         }
 
         public static void Urshr_S(ArmEmitterContext context)
         {
-            EmitScalarShrImmOpZx(context, ShrImmFlags.Round);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64UrshrS, shift);
+            }
+            else
+            {
+                EmitScalarShrImmOpZx(context, ShrImmFlags.Round);
+            }
         }
 
         public static void Urshr_V(ArmEmitterContext context)
         {
             OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size > 0)
+            if (Optimizations.UseAdvSimd)
+            {
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UrshrV, shift);
+            }
+            else if (Optimizations.UseSse2 && op.Size > 0)
             {
                 int shift = GetImmShr(op);
                 int eSize = 8 << op.Size;
@@ -593,14 +943,31 @@ namespace ARMeilleure.Instructions
 
         public static void Ursra_S(ArmEmitterContext context)
         {
-            EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64UrsraS, shift);
+            }
+            else
+            {
+                EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+            }
         }
 
         public static void Ursra_V(ArmEmitterContext context)
         {
             OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size > 0)
+            if (Optimizations.UseAdvSimd)
+            {
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64UrsraV, shift);
+            }
+            else if (Optimizations.UseSse2 && op.Size > 0)
             {
                 int shift = GetImmShr(op);
                 int eSize = 8 << op.Size;
@@ -638,12 +1005,26 @@ namespace ARMeilleure.Instructions
 
         public static void Ushl_S(ArmEmitterContext context)
         {
-            EmitShlRegOp(context, ShlRegFlags.Scalar);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64UshlS);
+            }
+            else
+            {
+                EmitShlRegOp(context, ShlRegFlags.Scalar);
+            }
         }
 
         public static void Ushl_V(ArmEmitterContext context)
         {
-            EmitShlRegOp(context, ShlRegFlags.None);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UshlV);
+            }
+            else
+            {
+                EmitShlRegOp(context, ShlRegFlags.None);
+            }
         }
 
         public static void Ushll_V(ArmEmitterContext context)
@@ -652,7 +1033,11 @@ namespace ARMeilleure.Instructions
 
             int shift = GetImmShl(op);
 
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UshllV, shift);
+            }
+            else if (Optimizations.UseSse41)
             {
                 Operand n = GetVec(op.Rn);
 
@@ -682,14 +1067,31 @@ namespace ARMeilleure.Instructions
 
         public static void Ushr_S(ArmEmitterContext context)
         {
-            EmitShrImmOp(context, ShrImmFlags.ScalarZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64UshrS, shift);
+            }
+            else
+            {
+                EmitShrImmOp(context, ShrImmFlags.ScalarZx);
+            }
         }
 
         public static void Ushr_V(ArmEmitterContext context)
         {
             OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size > 0)
+            if (Optimizations.UseAdvSimd)
+            {
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UshrV, shift);
+            }
+            else if (Optimizations.UseSse2 && op.Size > 0)
             {
                 int shift = GetImmShr(op);
 
@@ -714,14 +1116,31 @@ namespace ARMeilleure.Instructions
 
         public static void Usra_S(ArmEmitterContext context)
         {
-            EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64UsraS, shift);
+            }
+            else
+            {
+                EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate);
+            }
         }
 
         public static void Usra_V(ArmEmitterContext context)
         {
             OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size > 0)
+            if (Optimizations.UseAdvSimd)
+            {
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64UsraV, shift);
+            }
+            else if (Optimizations.UseSse2 && op.Size > 0)
             {
                 int shift = GetImmShr(op);
 
diff --git a/ARMeilleure/Instructions/InstEmitSystem.cs b/ARMeilleure/Instructions/InstEmitSystem.cs
index cc32228c3d..1345bbf109 100644
--- a/ARMeilleure/Instructions/InstEmitSystem.cs
+++ b/ARMeilleure/Instructions/InstEmitSystem.cs
@@ -150,6 +150,8 @@ namespace ARMeilleure.Instructions
         {
             OpCodeSystem op = (OpCodeSystem)context.CurrOp;
 
+            context.SyncQcFlag();
+
             Operand fpsr = Const(0);
 
             for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++)
@@ -196,6 +198,8 @@ namespace ARMeilleure.Instructions
         {
             OpCodeSystem op = (OpCodeSystem)context.CurrOp;
 
+            context.ClearQcFlagIfModified();
+
             Operand fpsr = GetIntOrZR(context, op.Rt);
                     fpsr = context.ConvertI64ToI32(fpsr);
 
diff --git a/ARMeilleure/IntermediateRepresentation/Intrinsic.cs b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs
index bc1285be27..a665e4b7a1 100644
--- a/ARMeilleure/IntermediateRepresentation/Intrinsic.cs
+++ b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs
@@ -2,6 +2,8 @@ namespace ARMeilleure.IntermediateRepresentation
 {
     enum Intrinsic : ushort
     {
+        // X86 (SSE and AVX)
+
         X86Addpd,
         X86Addps,
         X86Addsd,
@@ -172,6 +174,458 @@ namespace ARMeilleure.IntermediateRepresentation
         X86Vfnmsub231sd,
         X86Vfnmsub231ss,
         X86Xorpd,
-        X86Xorps
+        X86Xorps,
+
+        // Arm64 (FP and Advanced SIMD)
+
+        Arm64AbsS,
+        Arm64AbsV,
+        Arm64AddhnV,
+        Arm64AddpS,
+        Arm64AddpV,
+        Arm64AddvV,
+        Arm64AddS,
+        Arm64AddV,
+        Arm64AesdV,
+        Arm64AeseV,
+        Arm64AesimcV,
+        Arm64AesmcV,
+        Arm64AndV,
+        Arm64BicVi,
+        Arm64BicV,
+        Arm64BifV,
+        Arm64BitV,
+        Arm64BslV,
+        Arm64ClsV,
+        Arm64ClzV,
+        Arm64CmeqS,
+        Arm64CmeqV,
+        Arm64CmeqSz,
+        Arm64CmeqVz,
+        Arm64CmgeS,
+        Arm64CmgeV,
+        Arm64CmgeSz,
+        Arm64CmgeVz,
+        Arm64CmgtS,
+        Arm64CmgtV,
+        Arm64CmgtSz,
+        Arm64CmgtVz,
+        Arm64CmhiS,
+        Arm64CmhiV,
+        Arm64CmhsS,
+        Arm64CmhsV,
+        Arm64CmleSz,
+        Arm64CmleVz,
+        Arm64CmltSz,
+        Arm64CmltVz,
+        Arm64CmtstS,
+        Arm64CmtstV,
+        Arm64CntV,
+        Arm64DupSe,
+        Arm64DupVe,
+        Arm64DupGp,
+        Arm64EorV,
+        Arm64ExtV,
+        Arm64FabdS,
+        Arm64FabdV,
+        Arm64FabsV,
+        Arm64FabsS,
+        Arm64FacgeS,
+        Arm64FacgeV,
+        Arm64FacgtS,
+        Arm64FacgtV,
+        Arm64FaddpS,
+        Arm64FaddpV,
+        Arm64FaddV,
+        Arm64FaddS,
+        Arm64FccmpeS,
+        Arm64FccmpS,
+        Arm64FcmeqS,
+        Arm64FcmeqV,
+        Arm64FcmeqSz,
+        Arm64FcmeqVz,
+        Arm64FcmgeS,
+        Arm64FcmgeV,
+        Arm64FcmgeSz,
+        Arm64FcmgeVz,
+        Arm64FcmgtS,
+        Arm64FcmgtV,
+        Arm64FcmgtSz,
+        Arm64FcmgtVz,
+        Arm64FcmleSz,
+        Arm64FcmleVz,
+        Arm64FcmltSz,
+        Arm64FcmltVz,
+        Arm64FcmpeS,
+        Arm64FcmpS,
+        Arm64FcselS,
+        Arm64FcvtasS,
+        Arm64FcvtasV,
+        Arm64FcvtasGp,
+        Arm64FcvtauS,
+        Arm64FcvtauV,
+        Arm64FcvtauGp,
+        Arm64FcvtlV,
+        Arm64FcvtmsS,
+        Arm64FcvtmsV,
+        Arm64FcvtmsGp,
+        Arm64FcvtmuS,
+        Arm64FcvtmuV,
+        Arm64FcvtmuGp,
+        Arm64FcvtnsS,
+        Arm64FcvtnsV,
+        Arm64FcvtnsGp,
+        Arm64FcvtnuS,
+        Arm64FcvtnuV,
+        Arm64FcvtnuGp,
+        Arm64FcvtnV,
+        Arm64FcvtpsS,
+        Arm64FcvtpsV,
+        Arm64FcvtpsGp,
+        Arm64FcvtpuS,
+        Arm64FcvtpuV,
+        Arm64FcvtpuGp,
+        Arm64FcvtxnS,
+        Arm64FcvtxnV,
+        Arm64FcvtzsSFixed,
+        Arm64FcvtzsVFixed,
+        Arm64FcvtzsS,
+        Arm64FcvtzsV,
+        Arm64FcvtzsGpFixed,
+        Arm64FcvtzsGp,
+        Arm64FcvtzuSFixed,
+        Arm64FcvtzuVFixed,
+        Arm64FcvtzuS,
+        Arm64FcvtzuV,
+        Arm64FcvtzuGpFixed,
+        Arm64FcvtzuGp,
+        Arm64FcvtS,
+        Arm64FdivV,
+        Arm64FdivS,
+        Arm64FmaddS,
+        Arm64FmaxnmpS,
+        Arm64FmaxnmpV,
+        Arm64FmaxnmvV,
+        Arm64FmaxnmV,
+        Arm64FmaxnmS,
+        Arm64FmaxpS,
+        Arm64FmaxpV,
+        Arm64FmaxvV,
+        Arm64FmaxV,
+        Arm64FmaxS,
+        Arm64FminnmpS,
+        Arm64FminnmpV,
+        Arm64FminnmvV,
+        Arm64FminnmV,
+        Arm64FminnmS,
+        Arm64FminpS,
+        Arm64FminpV,
+        Arm64FminvV,
+        Arm64FminV,
+        Arm64FminS,
+        Arm64FmlaSe,
+        Arm64FmlaVe,
+        Arm64FmlaV,
+        Arm64FmlsSe,
+        Arm64FmlsVe,
+        Arm64FmlsV,
+        Arm64FmovVi,
+        Arm64FmovS,
+        Arm64FmovGp,
+        Arm64FmovSi,
+        Arm64FmsubS,
+        Arm64FmulxSe,
+        Arm64FmulxVe,
+        Arm64FmulxS,
+        Arm64FmulxV,
+        Arm64FmulSe,
+        Arm64FmulVe,
+        Arm64FmulV,
+        Arm64FmulS,
+        Arm64FnegV,
+        Arm64FnegS,
+        Arm64FnmaddS,
+        Arm64FnmsubS,
+        Arm64FnmulS,
+        Arm64FrecpeS,
+        Arm64FrecpeV,
+        Arm64FrecpsS,
+        Arm64FrecpsV,
+        Arm64FrecpxS,
+        Arm64FrintaV,
+        Arm64FrintaS,
+        Arm64FrintiV,
+        Arm64FrintiS,
+        Arm64FrintmV,
+        Arm64FrintmS,
+        Arm64FrintnV,
+        Arm64FrintnS,
+        Arm64FrintpV,
+        Arm64FrintpS,
+        Arm64FrintxV,
+        Arm64FrintxS,
+        Arm64FrintzV,
+        Arm64FrintzS,
+        Arm64FrsqrteS,
+        Arm64FrsqrteV,
+        Arm64FrsqrtsS,
+        Arm64FrsqrtsV,
+        Arm64FsqrtV,
+        Arm64FsqrtS,
+        Arm64FsubV,
+        Arm64FsubS,
+        Arm64InsVe,
+        Arm64InsGp,
+        Arm64Ld1rV,
+        Arm64Ld1Vms,
+        Arm64Ld1Vss,
+        Arm64Ld2rV,
+        Arm64Ld2Vms,
+        Arm64Ld2Vss,
+        Arm64Ld3rV,
+        Arm64Ld3Vms,
+        Arm64Ld3Vss,
+        Arm64Ld4rV,
+        Arm64Ld4Vms,
+        Arm64Ld4Vss,
+        Arm64MlaVe,
+        Arm64MlaV,
+        Arm64MlsVe,
+        Arm64MlsV,
+        Arm64MoviV,
+        Arm64MrsFpsr,
+        Arm64MsrFpsr,
+        Arm64MulVe,
+        Arm64MulV,
+        Arm64MvniV,
+        Arm64NegS,
+        Arm64NegV,
+        Arm64NotV,
+        Arm64OrnV,
+        Arm64OrrVi,
+        Arm64OrrV,
+        Arm64PmullV,
+        Arm64PmulV,
+        Arm64RaddhnV,
+        Arm64RbitV,
+        Arm64Rev16V,
+        Arm64Rev32V,
+        Arm64Rev64V,
+        Arm64RshrnV,
+        Arm64RsubhnV,
+        Arm64SabalV,
+        Arm64SabaV,
+        Arm64SabdlV,
+        Arm64SabdV,
+        Arm64SadalpV,
+        Arm64SaddlpV,
+        Arm64SaddlvV,
+        Arm64SaddlV,
+        Arm64SaddwV,
+        Arm64ScvtfSFixed,
+        Arm64ScvtfVFixed,
+        Arm64ScvtfS,
+        Arm64ScvtfV,
+        Arm64ScvtfGpFixed,
+        Arm64ScvtfGp,
+        Arm64Sha1cV,
+        Arm64Sha1hV,
+        Arm64Sha1mV,
+        Arm64Sha1pV,
+        Arm64Sha1su0V,
+        Arm64Sha1su1V,
+        Arm64Sha256h2V,
+        Arm64Sha256hV,
+        Arm64Sha256su0V,
+        Arm64Sha256su1V,
+        Arm64ShaddV,
+        Arm64ShllV,
+        Arm64ShlS,
+        Arm64ShlV,
+        Arm64ShrnV,
+        Arm64ShsubV,
+        Arm64SliS,
+        Arm64SliV,
+        Arm64SmaxpV,
+        Arm64SmaxvV,
+        Arm64SmaxV,
+        Arm64SminpV,
+        Arm64SminvV,
+        Arm64SminV,
+        Arm64SmlalVe,
+        Arm64SmlalV,
+        Arm64SmlslVe,
+        Arm64SmlslV,
+        Arm64SmovV,
+        Arm64SmullVe,
+        Arm64SmullV,
+        Arm64SqabsS,
+        Arm64SqabsV,
+        Arm64SqaddS,
+        Arm64SqaddV,
+        Arm64SqdmlalSe,
+        Arm64SqdmlalVe,
+        Arm64SqdmlalS,
+        Arm64SqdmlalV,
+        Arm64SqdmlslSe,
+        Arm64SqdmlslVe,
+        Arm64SqdmlslS,
+        Arm64SqdmlslV,
+        Arm64SqdmulhSe,
+        Arm64SqdmulhVe,
+        Arm64SqdmulhS,
+        Arm64SqdmulhV,
+        Arm64SqdmullSe,
+        Arm64SqdmullVe,
+        Arm64SqdmullS,
+        Arm64SqdmullV,
+        Arm64SqnegS,
+        Arm64SqnegV,
+        Arm64SqrdmulhSe,
+        Arm64SqrdmulhVe,
+        Arm64SqrdmulhS,
+        Arm64SqrdmulhV,
+        Arm64SqrshlS,
+        Arm64SqrshlV,
+        Arm64SqrshrnS,
+        Arm64SqrshrnV,
+        Arm64SqrshrunS,
+        Arm64SqrshrunV,
+        Arm64SqshluS,
+        Arm64SqshluV,
+        Arm64SqshlSi,
+        Arm64SqshlVi,
+        Arm64SqshlS,
+        Arm64SqshlV,
+        Arm64SqshrnS,
+        Arm64SqshrnV,
+        Arm64SqshrunS,
+        Arm64SqshrunV,
+        Arm64SqsubS,
+        Arm64SqsubV,
+        Arm64SqxtnS,
+        Arm64SqxtnV,
+        Arm64SqxtunS,
+        Arm64SqxtunV,
+        Arm64SrhaddV,
+        Arm64SriS,
+        Arm64SriV,
+        Arm64SrshlS,
+        Arm64SrshlV,
+        Arm64SrshrS,
+        Arm64SrshrV,
+        Arm64SrsraS,
+        Arm64SrsraV,
+        Arm64SshllV,
+        Arm64SshlS,
+        Arm64SshlV,
+        Arm64SshrS,
+        Arm64SshrV,
+        Arm64SsraS,
+        Arm64SsraV,
+        Arm64SsublV,
+        Arm64SsubwV,
+        Arm64St1Vms,
+        Arm64St1Vss,
+        Arm64St2Vms,
+        Arm64St2Vss,
+        Arm64St3Vms,
+        Arm64St3Vss,
+        Arm64St4Vms,
+        Arm64St4Vss,
+        Arm64SubhnV,
+        Arm64SubS,
+        Arm64SubV,
+        Arm64SuqaddS,
+        Arm64SuqaddV,
+        Arm64TblV,
+        Arm64TbxV,
+        Arm64Trn1V,
+        Arm64Trn2V,
+        Arm64UabalV,
+        Arm64UabaV,
+        Arm64UabdlV,
+        Arm64UabdV,
+        Arm64UadalpV,
+        Arm64UaddlpV,
+        Arm64UaddlvV,
+        Arm64UaddlV,
+        Arm64UaddwV,
+        Arm64UcvtfSFixed,
+        Arm64UcvtfVFixed,
+        Arm64UcvtfS,
+        Arm64UcvtfV,
+        Arm64UcvtfGpFixed,
+        Arm64UcvtfGp,
+        Arm64UhaddV,
+        Arm64UhsubV,
+        Arm64UmaxpV,
+        Arm64UmaxvV,
+        Arm64UmaxV,
+        Arm64UminpV,
+        Arm64UminvV,
+        Arm64UminV,
+        Arm64UmlalVe,
+        Arm64UmlalV,
+        Arm64UmlslVe,
+        Arm64UmlslV,
+        Arm64UmovV,
+        Arm64UmullVe,
+        Arm64UmullV,
+        Arm64UqaddS,
+        Arm64UqaddV,
+        Arm64UqrshlS,
+        Arm64UqrshlV,
+        Arm64UqrshrnS,
+        Arm64UqrshrnV,
+        Arm64UqshlSi,
+        Arm64UqshlVi,
+        Arm64UqshlS,
+        Arm64UqshlV,
+        Arm64UqshrnS,
+        Arm64UqshrnV,
+        Arm64UqsubS,
+        Arm64UqsubV,
+        Arm64UqxtnS,
+        Arm64UqxtnV,
+        Arm64UrecpeV,
+        Arm64UrhaddV,
+        Arm64UrshlS,
+        Arm64UrshlV,
+        Arm64UrshrS,
+        Arm64UrshrV,
+        Arm64UrsqrteV,
+        Arm64UrsraS,
+        Arm64UrsraV,
+        Arm64UshllV,
+        Arm64UshlS,
+        Arm64UshlV,
+        Arm64UshrS,
+        Arm64UshrV,
+        Arm64UsqaddS,
+        Arm64UsqaddV,
+        Arm64UsraS,
+        Arm64UsraV,
+        Arm64UsublV,
+        Arm64UsubwV,
+        Arm64Uzp1V,
+        Arm64Uzp2V,
+        Arm64XtnV,
+        Arm64Zip1V,
+        Arm64Zip2V,
+
+        Arm64VTypeShift = 13,
+        Arm64VTypeMask = 1 << Arm64VTypeShift,
+        Arm64V64 = 0 << Arm64VTypeShift,
+        Arm64V128 = 1 << Arm64VTypeShift,
+
+        Arm64VSizeShift = 14,
+        Arm64VSizeMask = 3 << Arm64VSizeShift,
+        Arm64VFloat = 0 << Arm64VSizeShift,
+        Arm64VDouble = 1 << Arm64VSizeShift,
+        Arm64VByte = 0 << Arm64VSizeShift,
+        Arm64VHWord = 1 << Arm64VSizeShift,
+        Arm64VWord = 2 << Arm64VSizeShift,
+        Arm64VDWord = 3 << Arm64VSizeShift
     }
 }
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/Multiplier.cs b/ARMeilleure/IntermediateRepresentation/Multiplier.cs
index 23582072b1..d6bc7d9947 100644
--- a/ARMeilleure/IntermediateRepresentation/Multiplier.cs
+++ b/ARMeilleure/IntermediateRepresentation/Multiplier.cs
@@ -5,6 +5,7 @@ namespace ARMeilleure.IntermediateRepresentation
         x1 = 0,
         x2 = 1,
         x4 = 2,
-        x8 = 3
+        x8 = 3,
+        x16 = 4
     }
 }
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/Operand.cs b/ARMeilleure/IntermediateRepresentation/Operand.cs
index 896d3420c2..9e8de3ba43 100644
--- a/ARMeilleure/IntermediateRepresentation/Operand.cs
+++ b/ARMeilleure/IntermediateRepresentation/Operand.cs
@@ -259,6 +259,20 @@ namespace ARMeilleure.IntermediateRepresentation
             }
         }
 
+        public Span<Operation> GetUses(ref Span<Operation> buffer)
+        {
+            ReadOnlySpan<Operation> uses = Uses;
+
+            if (buffer.Length < uses.Length)
+            {
+                buffer = Allocators.Default.AllocateSpan<Operation>((uint)uses.Length);
+            }
+
+            uses.CopyTo(buffer);
+
+            return buffer.Slice(0, uses.Length);
+        }
+
         private static void New<T>(ref T* data, ref ushort count, ref ushort capacity, ushort initialCapacity) where T : unmanaged
         {
             count = 0;
diff --git a/ARMeilleure/IntermediateRepresentation/OperandType.cs b/ARMeilleure/IntermediateRepresentation/OperandType.cs
index bfdf5130cf..81b22cf563 100644
--- a/ARMeilleure/IntermediateRepresentation/OperandType.cs
+++ b/ARMeilleure/IntermediateRepresentation/OperandType.cs
@@ -47,5 +47,19 @@ namespace ARMeilleure.IntermediateRepresentation
 
             throw new InvalidOperationException($"Invalid operand type \"{type}\".");
         }
+
+        public static int GetSizeInBytesLog2(this OperandType type)
+        {
+            switch (type)
+            {
+                case OperandType.FP32: return 2;
+                case OperandType.FP64: return 3;
+                case OperandType.I32:  return 2;
+                case OperandType.I64:  return 3;
+                case OperandType.V128: return 4;
+            }
+
+            throw new InvalidOperationException($"Invalid operand type \"{type}\".");
+        }
     }
 }
\ No newline at end of file
diff --git a/ARMeilleure/Memory/ReservedRegion.cs b/ARMeilleure/Memory/ReservedRegion.cs
index d634910898..2197afad99 100644
--- a/ARMeilleure/Memory/ReservedRegion.cs
+++ b/ARMeilleure/Memory/ReservedRegion.cs
@@ -4,7 +4,7 @@ namespace ARMeilleure.Memory
 {
     class ReservedRegion
     {
-        private const int DefaultGranularity = 65536; // Mapping granularity in Windows.
+        public const int DefaultGranularity = 65536; // Mapping granularity in Windows.
 
         public IJitMemoryBlock Block { get; }
 
diff --git a/ARMeilleure/Native/JitSupportDarwin.cs b/ARMeilleure/Native/JitSupportDarwin.cs
new file mode 100644
index 0000000000..7d6a8634a9
--- /dev/null
+++ b/ARMeilleure/Native/JitSupportDarwin.cs
@@ -0,0 +1,13 @@
+using System;
+using System.Runtime.InteropServices;
+using System.Runtime.Versioning;
+
+namespace ARMeilleure.Native
+{
+    [SupportedOSPlatform("macos")]
+    public static partial class JitSupportDarwin
+    {
+        [LibraryImport("libarmeilleure-jitsupport", EntryPoint = "armeilleure_jit_memcpy")]
+        public static partial void Copy(IntPtr dst, IntPtr src, ulong n);
+    }
+}
diff --git a/ARMeilleure/Native/libs/libarmeilleure-jitsupport.dylib b/ARMeilleure/Native/libs/libarmeilleure-jitsupport.dylib
new file mode 100644
index 0000000000..c65b0a4efb
Binary files /dev/null and b/ARMeilleure/Native/libs/libarmeilleure-jitsupport.dylib differ
diff --git a/ARMeilleure/Native/macos_jit_support/Makefile b/ARMeilleure/Native/macos_jit_support/Makefile
new file mode 100644
index 0000000000..d6da35d52b
--- /dev/null
+++ b/ARMeilleure/Native/macos_jit_support/Makefile
@@ -0,0 +1,8 @@
+NAME = libarmeilleure-jitsupport.dylib
+
+all: ${NAME}
+
+${NAME}:
+	clang -O3 -dynamiclib support.c -o ${NAME}
+clean:
+	rm -f ${NAME}
diff --git a/ARMeilleure/Native/macos_jit_support/support.c b/ARMeilleure/Native/macos_jit_support/support.c
new file mode 100644
index 0000000000..1b13d90668
--- /dev/null
+++ b/ARMeilleure/Native/macos_jit_support/support.c
@@ -0,0 +1,14 @@
+#include <stddef.h>
+#include <string.h>
+#include <pthread.h>
+
+#include <libkern/OSCacheControl.h>
+
+void armeilleure_jit_memcpy(void *dst, const void *src, size_t n) {
+    pthread_jit_write_protect_np(0);
+    memcpy(dst, src, n);
+    pthread_jit_write_protect_np(1);
+
+    // Ensure that the instruction cache for this range is invalidated.
+    sys_icache_invalidate(dst, n);
+}
diff --git a/ARMeilleure/Optimizations.cs b/ARMeilleure/Optimizations.cs
index 97defd9a95..0810d96c91 100644
--- a/ARMeilleure/Optimizations.cs
+++ b/ARMeilleure/Optimizations.cs
@@ -1,4 +1,5 @@
 using ARMeilleure.CodeGen.X86;
+using System.Runtime.Intrinsics.Arm;
 
 namespace ARMeilleure
 {
@@ -9,6 +10,8 @@ namespace ARMeilleure
         public static bool AllowLcqInFunctionTable  { get; set; } = true;
         public static bool UseUnmanagedDispatchLoop { get; set; } = true;
 
+        public static bool UseAdvSimdIfAvailable { get; set; } = true;
+
         public static bool UseSseIfAvailable       { get; set; } = true;
         public static bool UseSse2IfAvailable      { get; set; } = true;
         public static bool UseSse3IfAvailable      { get; set; } = true;
@@ -30,6 +33,8 @@ namespace ARMeilleure
             set => HardwareCapabilities.ForceLegacySse = value;
         }
 
+        internal static bool UseAdvSimd => UseAdvSimdIfAvailable && AdvSimd.IsSupported;
+
         internal static bool UseSse       => UseSseIfAvailable       && HardwareCapabilities.SupportsSse;
         internal static bool UseSse2      => UseSse2IfAvailable      && HardwareCapabilities.SupportsSse2;
         internal static bool UseSse3      => UseSse3IfAvailable      && HardwareCapabilities.SupportsSse3;
diff --git a/ARMeilleure/Signal/NativeSignalHandler.cs b/ARMeilleure/Signal/NativeSignalHandler.cs
index 0257f44039..da02f76a88 100644
--- a/ARMeilleure/Signal/NativeSignalHandler.cs
+++ b/ARMeilleure/Signal/NativeSignalHandler.cs
@@ -1,5 +1,7 @@
 using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Memory;
 using ARMeilleure.Translation;
+using ARMeilleure.Translation.Cache;
 using System;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
@@ -69,8 +71,8 @@ namespace ARMeilleure.Signal
 
         private const uint EXCEPTION_ACCESS_VIOLATION = 0xc0000005;
 
-        private const ulong PageSize = 0x1000;
-        private const ulong PageMask = PageSize - 1;
+        private static ulong _pageSize = GetPageSize();
+        private static ulong _pageMask = _pageSize - 1;
 
         private static IntPtr _handlerConfig;
         private static IntPtr _signalHandlerPtr;
@@ -79,6 +81,19 @@ namespace ARMeilleure.Signal
         private static readonly object _lock = new object();
         private static bool _initialized;
 
+        private static ulong GetPageSize()
+        {
+            // TODO: This needs to be based on the current memory manager configuration.
+            if (OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
+            {
+                return 1UL << 14;
+            }
+            else
+            {
+                return 1UL << 12;
+            }
+        }
+
         static NativeSignalHandler()
         {
             _handlerConfig = Marshal.AllocHGlobal(Unsafe.SizeOf<SignalHandlerConfig>());
@@ -87,7 +102,12 @@ namespace ARMeilleure.Signal
             config = new SignalHandlerConfig();
         }
 
-        public static void InitializeSignalHandler()
+        public static void InitializeJitCache(IJitMemoryAllocator allocator)
+        {
+            JitCache.Initialize(allocator);
+        }
+
+        public static void InitializeSignalHandler(Func<IntPtr, IntPtr, IntPtr> customSignalHandlerFactory = null)
         {
             if (_initialized) return;
 
@@ -95,10 +115,9 @@ namespace ARMeilleure.Signal
             {
                 if (_initialized) return;
 
-                bool unix = OperatingSystem.IsLinux() || OperatingSystem.IsMacOS();
                 ref SignalHandlerConfig config = ref GetConfigRef();
 
-                if (unix)
+                if (OperatingSystem.IsLinux() || OperatingSystem.IsMacOS())
                 {
                     // Unix siginfo struct locations.
                     // NOTE: These are incredibly likely to be different between kernel version and architectures.
@@ -108,7 +127,13 @@ namespace ARMeilleure.Signal
 
                     _signalHandlerPtr = Marshal.GetFunctionPointerForDelegate(GenerateUnixSignalHandler(_handlerConfig));
 
-                    SigAction old = UnixSignalHandlerRegistration.RegisterExceptionHandler(_signalHandlerPtr);
+                    if (customSignalHandlerFactory != null)
+                    {
+                        _signalHandlerPtr = customSignalHandlerFactory(UnixSignalHandlerRegistration.GetSegfaultExceptionHandler().sa_handler, _signalHandlerPtr);
+                    }
+
+                    var old = UnixSignalHandlerRegistration.RegisterExceptionHandler(_signalHandlerPtr);
+
                     config.UnixOldSigaction = (nuint)(ulong)old.sa_handler;
                     config.UnixOldSigaction3Arg = old.sa_flags & 4;
                 }
@@ -119,6 +144,11 @@ namespace ARMeilleure.Signal
 
                     _signalHandlerPtr = Marshal.GetFunctionPointerForDelegate(GenerateWindowsSignalHandler(_handlerConfig));
 
+                    if (customSignalHandlerFactory != null)
+                    {
+                        _signalHandlerPtr = customSignalHandlerFactory(IntPtr.Zero, _signalHandlerPtr);
+                    }
+
                     _signalHandlerHandle = WindowsSignalHandlerRegistration.RegisterExceptionHandler(_signalHandlerPtr);
                 }
 
@@ -197,7 +227,7 @@ namespace ARMeilleure.Signal
                 // Only call tracking if in range.
                 context.BranchIfFalse(nextLabel, inRange, BasicBlockFrequency.Cold);
 
-                Operand offset = context.BitwiseAnd(context.Subtract(faultAddress, rangeAddress), Const(~PageMask));
+                Operand offset = context.BitwiseAnd(context.Subtract(faultAddress, rangeAddress), Const(~_pageMask));
 
                 // Call the tracking action, with the pointer's relative offset to the base address.
                 Operand trackingActionPtr = context.Load(OperandType.I64, Const((ulong)signalStructPtr + rangeBaseOffset + 20));
@@ -208,7 +238,7 @@ namespace ARMeilleure.Signal
 
                 // Tracking action should be non-null to call it, otherwise assume false return.
                 context.BranchIfFalse(skipActionLabel, trackingActionPtr);
-                Operand result = context.Call(trackingActionPtr, OperandType.I32, offset, Const(PageSize), isWrite, Const(0));
+                Operand result = context.Call(trackingActionPtr, OperandType.I32, offset, Const(_pageSize), isWrite, Const(0));
                 context.Copy(inRegionLocal, result);
 
                 context.MarkLabel(skipActionLabel);
@@ -278,7 +308,7 @@ namespace ARMeilleure.Signal
 
             OperandType[] argTypes = new OperandType[] { OperandType.I32, OperandType.I64, OperandType.I64 };
 
-            return Compiler.Compile(cfg, argTypes, OperandType.None, CompilerOptions.HighCq).Map<UnixExceptionHandler>();
+            return Compiler.Compile(cfg, argTypes, OperandType.None, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<UnixExceptionHandler>();
         }
 
         private static VectoredExceptionHandler GenerateWindowsSignalHandler(IntPtr signalStructPtr)
@@ -332,7 +362,7 @@ namespace ARMeilleure.Signal
 
             OperandType[] argTypes = new OperandType[] { OperandType.I64 };
 
-            return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq).Map<VectoredExceptionHandler>();
+            return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<VectoredExceptionHandler>();
         }
     }
 }
diff --git a/ARMeilleure/Signal/TestMethods.cs b/ARMeilleure/Signal/TestMethods.cs
index 2d7cef166e..e2ecad2428 100644
--- a/ARMeilleure/Signal/TestMethods.cs
+++ b/ARMeilleure/Signal/TestMethods.cs
@@ -1,7 +1,7 @@
 using ARMeilleure.IntermediateRepresentation;
 using ARMeilleure.Translation;
 using System;
-
+using System.Runtime.InteropServices;
 using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
 
 namespace ARMeilleure.Signal
@@ -32,7 +32,7 @@ namespace ARMeilleure.Signal
 
             OperandType[] argTypes = new OperandType[] { OperandType.I64 };
 
-            return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq).Map<DebugPartialUnmap>();
+            return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<DebugPartialUnmap>();
         }
 
         public static DebugThreadLocalMapGetOrReserve GenerateDebugThreadLocalMapGetOrReserve(IntPtr structPtr)
@@ -49,7 +49,7 @@ namespace ARMeilleure.Signal
 
             OperandType[] argTypes = new OperandType[] { OperandType.I64 };
 
-            return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq).Map<DebugThreadLocalMapGetOrReserve>();
+            return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<DebugThreadLocalMapGetOrReserve>();
         }
 
         public static DebugNativeWriteLoop GenerateDebugNativeWriteLoop()
@@ -78,7 +78,7 @@ namespace ARMeilleure.Signal
 
             OperandType[] argTypes = new OperandType[] { OperandType.I64 };
 
-            return Compiler.Compile(cfg, argTypes, OperandType.None, CompilerOptions.HighCq).Map<DebugNativeWriteLoop>();
+            return Compiler.Compile(cfg, argTypes, OperandType.None, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<DebugNativeWriteLoop>();
         }
     }
 }
diff --git a/ARMeilleure/Signal/UnixSignalHandlerRegistration.cs b/ARMeilleure/Signal/UnixSignalHandlerRegistration.cs
index 945a01dae4..22009240bb 100644
--- a/ARMeilleure/Signal/UnixSignalHandlerRegistration.cs
+++ b/ARMeilleure/Signal/UnixSignalHandlerRegistration.cs
@@ -3,23 +3,23 @@ using System.Runtime.InteropServices;
 
 namespace ARMeilleure.Signal
 {
-    [StructLayout(LayoutKind.Sequential, Pack = 1)]
-    unsafe struct SigSet
-    {
-        fixed long sa_mask[16];
-    }
-
-    [StructLayout(LayoutKind.Sequential, Pack = 1)]
-    struct SigAction
-    {
-        public IntPtr sa_handler;
-        public SigSet sa_mask;
-        public int sa_flags;
-        public IntPtr sa_restorer;
-    }
-
     static partial class UnixSignalHandlerRegistration
     {
+        [StructLayout(LayoutKind.Sequential, Pack = 1)]
+        public unsafe struct SigSet
+        {
+            fixed long sa_mask[16];
+        }
+
+        [StructLayout(LayoutKind.Sequential, Pack = 1)]
+        public struct SigAction
+        {
+            public IntPtr sa_handler;
+            public SigSet sa_mask;
+            public int sa_flags;
+            public IntPtr sa_restorer;
+        }
+
         private const int SIGSEGV = 11;
         private const int SIGBUS = 10;
         private const int SA_SIGINFO = 0x00000004;
@@ -27,9 +27,24 @@ namespace ARMeilleure.Signal
         [LibraryImport("libc", SetLastError = true)]
         private static partial int sigaction(int signum, ref SigAction sigAction, out SigAction oldAction);
 
+        [LibraryImport("libc", SetLastError = true)]
+        private static partial int sigaction(int signum, IntPtr sigAction, out SigAction oldAction);
+
         [LibraryImport("libc", SetLastError = true)]
         private static partial int sigemptyset(ref SigSet set);
 
+        public static SigAction GetSegfaultExceptionHandler()
+        {
+            int result = sigaction(SIGSEGV, IntPtr.Zero, out SigAction old);
+
+            if (result != 0)
+            {
+                throw new InvalidOperationException($"Could not get SIGSEGV sigaction. Error: {result}");
+            }
+
+            return old;
+        }
+
         public static SigAction RegisterExceptionHandler(IntPtr action)
         {
             SigAction sig = new SigAction
@@ -49,7 +64,7 @@ namespace ARMeilleure.Signal
 
             if (OperatingSystem.IsMacOS())
             {
-                result = sigaction(SIGBUS, ref sig, out SigAction oldb);
+                result = sigaction(SIGBUS, ref sig, out _);
 
                 if (result != 0)
                 {
diff --git a/ARMeilleure/Translation/ArmEmitterContext.cs b/ARMeilleure/Translation/ArmEmitterContext.cs
index 48254de4e7..238f85082c 100644
--- a/ARMeilleure/Translation/ArmEmitterContext.cs
+++ b/ARMeilleure/Translation/ArmEmitterContext.cs
@@ -39,6 +39,8 @@ namespace ARMeilleure.Translation
             }
         }
 
+        private bool _pendingQcFlagSync;
+
         public OpCode CurrOp { get; set; }
 
         public IMemoryManager Memory { get; }
@@ -81,6 +83,8 @@ namespace ARMeilleure.Translation
 
         public override Operand Call(MethodInfo info, params Operand[] callArgs)
         {
+            SyncQcFlag();
+
             if (!HasPtc)
             {
                 return base.Call(info, callArgs);
@@ -139,6 +143,51 @@ namespace ARMeilleure.Translation
             _optOpLastFlagSet = null;
         }
 
+        public void SetPendingQcFlagSync()
+        {
+            _pendingQcFlagSync = true;
+        }
+
+        public void SyncQcFlag()
+        {
+            if (_pendingQcFlagSync)
+            {
+                if (Optimizations.UseAdvSimd)
+                {
+                    Operand fpsr = AddIntrinsicInt(Intrinsic.Arm64MrsFpsr);
+
+                    uint qcFlagMask = (uint)FPSR.Qc;
+
+                    Operand qcClearLabel = Label();
+
+                    BranchIfFalse(qcClearLabel, BitwiseAnd(fpsr, Const(qcFlagMask)));
+
+                    AddIntrinsicNoRet(Intrinsic.Arm64MsrFpsr, Const(0));
+                    InstEmitHelper.SetFpFlag(this, FPState.QcFlag, Const(1));
+
+                    MarkLabel(qcClearLabel);
+                }
+
+                _pendingQcFlagSync = false;
+            }
+        }
+
+        public void ClearQcFlag()
+        {
+            if (Optimizations.UseAdvSimd)
+            {
+                AddIntrinsicNoRet(Intrinsic.Arm64MsrFpsr, Const(0));
+            }
+        }
+
+        public void ClearQcFlagIfModified()
+        {
+            if (_pendingQcFlagSync && Optimizations.UseAdvSimd)
+            {
+                AddIntrinsicNoRet(Intrinsic.Arm64MsrFpsr, Const(0));
+            }
+        }
+
         public Operand TryGetComparisonResult(Condition condition)
         {
             if (_optOpLastCompare == null || _optOpLastCompare != _optOpLastFlagSet)
diff --git a/ARMeilleure/Translation/Cache/JitCache.cs b/ARMeilleure/Translation/Cache/JitCache.cs
index 24affa34ed..f496a8e9ce 100644
--- a/ARMeilleure/Translation/Cache/JitCache.cs
+++ b/ARMeilleure/Translation/Cache/JitCache.cs
@@ -1,6 +1,7 @@
 using ARMeilleure.CodeGen;
 using ARMeilleure.CodeGen.Unwinding;
 using ARMeilleure.Memory;
+using ARMeilleure.Native;
 using System;
 using System.Collections.Generic;
 using System.Diagnostics;
@@ -17,6 +18,7 @@ namespace ARMeilleure.Translation.Cache
         private const int CacheSize = 2047 * 1024 * 1024;
 
         private static ReservedRegion _jitRegion;
+        private static JitCacheInvalidation _jitCacheInvalidator;
 
         private static CacheMemoryAllocator _cacheAllocator;
 
@@ -25,8 +27,6 @@ namespace ARMeilleure.Translation.Cache
         private static readonly object _lock = new object();
         private static bool _initialized;
 
-        public static IntPtr Base => _jitRegion.Pointer;
-
         public static void Initialize(IJitMemoryAllocator allocator)
         {
             if (_initialized) return;
@@ -36,6 +36,7 @@ namespace ARMeilleure.Translation.Cache
                 if (_initialized) return;
 
                 _jitRegion = new ReservedRegion(allocator, CacheSize);
+                _jitCacheInvalidator = new JitCacheInvalidation(allocator);
 
                 _cacheAllocator = new CacheMemoryAllocator(CacheSize);
 
@@ -60,11 +61,24 @@ namespace ARMeilleure.Translation.Cache
 
                 IntPtr funcPtr = _jitRegion.Pointer + funcOffset;
 
-                ReprotectAsWritable(funcOffset, code.Length);
+                if (OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
+                {
+                    unsafe
+                    {
+                        fixed (byte *codePtr = code)
+                        {
+                            JitSupportDarwin.Copy(funcPtr, (IntPtr)codePtr, (ulong)code.Length);
+                        }
+                    }
+                }
+                else
+                {
+                    ReprotectAsWritable(funcOffset, code.Length);
+                    Marshal.Copy(code, 0, funcPtr, code.Length);
+                    ReprotectAsExecutable(funcOffset, code.Length);
 
-                Marshal.Copy(code, 0, funcPtr, code.Length);
-
-                ReprotectAsExecutable(funcOffset, code.Length);
+                    _jitCacheInvalidator.Invalidate(funcPtr, (ulong)code.Length);
+                }
 
                 Add(funcOffset, code.Length, func.UnwindInfo);
 
diff --git a/ARMeilleure/Translation/Cache/JitCacheInvalidation.cs b/ARMeilleure/Translation/Cache/JitCacheInvalidation.cs
new file mode 100644
index 0000000000..ec2ae73bb9
--- /dev/null
+++ b/ARMeilleure/Translation/Cache/JitCacheInvalidation.cs
@@ -0,0 +1,79 @@
+using ARMeilleure.Memory;
+using System;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.Translation.Cache
+{
+    class JitCacheInvalidation
+    {
+        private static int[] _invalidationCode = new int[]
+        {
+            unchecked((int)0xd53b0022), // mrs  x2, ctr_el0
+            unchecked((int)0xd3504c44), // ubfx x4, x2, #16, #4
+            unchecked((int)0x52800083), // mov  w3, #0x4
+            unchecked((int)0x12000c45), // and  w5, w2, #0xf
+            unchecked((int)0x1ac42064), // lsl  w4, w3, w4
+            unchecked((int)0x51000482), // sub  w2, w4, #0x1
+            unchecked((int)0x8a220002), // bic  x2, x0, x2
+            unchecked((int)0x1ac52063), // lsl  w3, w3, w5
+            unchecked((int)0xeb01005f), // cmp  x2, x1
+            unchecked((int)0x93407c84), // sxtw x4, w4
+            unchecked((int)0x540000a2), // b.cs 3c <do_ic_clear>
+            unchecked((int)0xd50b7b22), // dc   cvau, x2
+            unchecked((int)0x8b040042), // add  x2, x2, x4
+            unchecked((int)0xeb02003f), // cmp  x1, x2
+            unchecked((int)0x54ffffa8), // b.hi 2c <dc_clear_loop>
+            unchecked((int)0xd5033b9f), // dsb  ish
+            unchecked((int)0x51000462), // sub  w2, w3, #0x1
+            unchecked((int)0x93407c63), // sxtw x3, w3
+            unchecked((int)0x8a220000), // bic  x0, x0, x2
+            unchecked((int)0xeb00003f), // cmp  x1, x0
+            unchecked((int)0x540000a9), // b.ls 64 <exit>
+            unchecked((int)0xd50b7520), // ic   ivau, x0
+            unchecked((int)0x8b030000), // add  x0, x0, x3
+            unchecked((int)0xeb00003f), // cmp  x1, x0
+            unchecked((int)0x54ffffa8), // b.hi 54 <ic_clear_loop>
+            unchecked((int)0xd5033b9f), // dsb  ish
+            unchecked((int)0xd5033fdf), // isb
+            unchecked((int)0xd65f03c0), // ret
+        };
+
+        private delegate void InvalidateCache(ulong start, ulong end);
+
+        private InvalidateCache _invalidateCache;
+        private ReservedRegion _invalidateCacheCodeRegion;
+
+        private readonly bool _needsInvalidation;
+
+        public JitCacheInvalidation(IJitMemoryAllocator allocator)
+        {
+            // On macOS, a different path is used to write to the JIT cache, which does the invalidation.
+            if (!OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
+            {
+                ulong size = (ulong)_invalidationCode.Length * sizeof(int);
+                ulong mask = (ulong)ReservedRegion.DefaultGranularity - 1;
+
+                size = (size + mask) & ~mask;
+
+                _invalidateCacheCodeRegion = new ReservedRegion(allocator, size);
+                _invalidateCacheCodeRegion.ExpandIfNeeded(size);
+
+                Marshal.Copy(_invalidationCode, 0, _invalidateCacheCodeRegion.Pointer, _invalidationCode.Length);
+
+                _invalidateCacheCodeRegion.Block.MapAsRx(0, size);
+
+                _invalidateCache = Marshal.GetDelegateForFunctionPointer<InvalidateCache>(_invalidateCacheCodeRegion.Pointer);
+
+                _needsInvalidation = true;
+            }
+        }
+
+        public void Invalidate(IntPtr basePointer, ulong size)
+        {
+            if (_needsInvalidation)
+            {
+                _invalidateCache((ulong)basePointer, (ulong)basePointer + size);
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/Compiler.cs b/ARMeilleure/Translation/Compiler.cs
index 817bd487e0..d4aa5cd96b 100644
--- a/ARMeilleure/Translation/Compiler.cs
+++ b/ARMeilleure/Translation/Compiler.cs
@@ -1,8 +1,9 @@
 using ARMeilleure.CodeGen;
 using ARMeilleure.CodeGen.Optimizations;
-using ARMeilleure.CodeGen.X86;
 using ARMeilleure.Diagnostics;
 using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Runtime.InteropServices;
 
 namespace ARMeilleure.Translation
 {
@@ -12,7 +13,8 @@ namespace ARMeilleure.Translation
             ControlFlowGraph cfg,
             OperandType[]    argTypes,
             OperandType      retType,
-            CompilerOptions  options)
+            CompilerOptions  options,
+            Architecture     target)
         {
             CompilerContext cctx = new(cfg, argTypes, retType, options);
 
@@ -49,7 +51,18 @@ namespace ARMeilleure.Translation
                 Logger.EndPass(PassName.RegisterToLocal, cfg);
             }
 
-            return CodeGenerator.Generate(cctx);
+            if (target == Architecture.X64)
+            {
+                return CodeGen.X86.CodeGenerator.Generate(cctx);
+            }
+            else if (target == Architecture.Arm64)
+            {
+                return CodeGen.Arm64.CodeGenerator.Generate(cctx);
+            }
+            else
+            {
+                throw new NotImplementedException(target.ToString());
+            }
         }
     }
 }
\ No newline at end of file
diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs
index f99d6e5168..6f57e1883c 100644
--- a/ARMeilleure/Translation/PTC/Ptc.cs
+++ b/ARMeilleure/Translation/PTC/Ptc.cs
@@ -27,7 +27,7 @@ namespace ARMeilleure.Translation.PTC
         private const string OuterHeaderMagicString = "PTCohd\0\0";
         private const string InnerHeaderMagicString = "PTCihd\0\0";
 
-        private const uint InternalVersion = 4159; //! To be incremented manually for each change to the ARMeilleure project.
+        private const uint InternalVersion = 4114; //! To be incremented manually for each change to the ARMeilleure project.
 
         private const string ActualDir = "0";
         private const string BackupDir = "1";
diff --git a/ARMeilleure/Translation/Translator.cs b/ARMeilleure/Translation/Translator.cs
index 77ccdaeab3..75c4df23e8 100644
--- a/ARMeilleure/Translation/Translator.cs
+++ b/ARMeilleure/Translation/Translator.cs
@@ -14,6 +14,7 @@ using System;
 using System.Collections.Concurrent;
 using System.Collections.Generic;
 using System.Diagnostics;
+using System.Runtime.InteropServices;
 using System.Threading;
 using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
 
@@ -282,7 +283,7 @@ namespace ARMeilleure.Translation
                 options |= CompilerOptions.Relocatable;
             }
 
-            CompiledFunction compiledFunc = Compiler.Compile(cfg, argTypes, retType, options);
+            CompiledFunction compiledFunc = Compiler.Compile(cfg, argTypes, retType, options, RuntimeInformation.ProcessArchitecture);
 
             if (context.HasPtc && !singleStep)
             {
@@ -359,9 +360,14 @@ namespace ARMeilleure.Translation
                     }
                 }
 
-                if (block.Address == context.EntryAddress && !context.HighCq)
+                if (block.Address == context.EntryAddress)
                 {
-                    EmitRejitCheck(context, out counter);
+                    if (!context.HighCq)
+                    {
+                        EmitRejitCheck(context, out counter);
+                    }
+
+                    context.ClearQcFlag();
                 }
 
                 context.CurrBlock = block;
@@ -386,9 +392,14 @@ namespace ARMeilleure.Translation
 
                         bool isLastOp = opcIndex == block.OpCodes.Count - 1;
 
-                        if (isLastOp && block.Branch != null && !block.Branch.Exit && block.Branch.Address <= block.Address)
+                        if (isLastOp)
                         {
-                            EmitSynchronization(context);
+                            context.SyncQcFlag();
+
+                            if (block.Branch != null && !block.Branch.Exit && block.Branch.Address <= block.Address)
+                            {
+                                EmitSynchronization(context);
+                            }
                         }
 
                         Operand lblPredicateSkip = default;
diff --git a/ARMeilleure/Translation/TranslatorStubs.cs b/ARMeilleure/Translation/TranslatorStubs.cs
index 67d2bba8e5..6ed84de80b 100644
--- a/ARMeilleure/Translation/TranslatorStubs.cs
+++ b/ARMeilleure/Translation/TranslatorStubs.cs
@@ -171,7 +171,7 @@ namespace ARMeilleure.Translation
             var retType = OperandType.I64;
             var argTypes = new[] { OperandType.I64 };
 
-            var func = Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq).Map<GuestFunction>();
+            var func = Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<GuestFunction>();
 
             return Marshal.GetFunctionPointerForDelegate(func);
         }
@@ -197,7 +197,7 @@ namespace ARMeilleure.Translation
             var retType = OperandType.I64;
             var argTypes = new[] { OperandType.I64 };
 
-            var func = Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq).Map<GuestFunction>();
+            var func = Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<GuestFunction>();
 
             return Marshal.GetFunctionPointerForDelegate(func);
         }
@@ -235,7 +235,7 @@ namespace ARMeilleure.Translation
             var retType = OperandType.None;
             var argTypes = new[] { OperandType.I64, OperandType.I64 };
 
-            return Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq).Map<DispatcherFunction>();
+            return Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<DispatcherFunction>();
         }
     }
 }
diff --git a/Ryujinx.Cpu/Jit/JitMemoryAllocator.cs b/Ryujinx.Cpu/Jit/JitMemoryAllocator.cs
index 27bb09ccb1..0cf35c17b2 100644
--- a/Ryujinx.Cpu/Jit/JitMemoryAllocator.cs
+++ b/Ryujinx.Cpu/Jit/JitMemoryAllocator.cs
@@ -6,6 +6,6 @@ namespace Ryujinx.Cpu.Jit
     public class JitMemoryAllocator : IJitMemoryAllocator
     {
         public IJitMemoryBlock Allocate(ulong size) => new JitMemoryBlock(size, MemoryAllocationFlags.None);
-        public IJitMemoryBlock Reserve(ulong size) => new JitMemoryBlock(size, MemoryAllocationFlags.Reserve);
+        public IJitMemoryBlock Reserve(ulong size) => new JitMemoryBlock(size, MemoryAllocationFlags.Reserve | MemoryAllocationFlags.Jit);
     }
 }
diff --git a/Ryujinx.Memory/MemoryAllocationFlags.cs b/Ryujinx.Memory/MemoryAllocationFlags.cs
index 313f33e5f4..6f0ef1aa9b 100644
--- a/Ryujinx.Memory/MemoryAllocationFlags.cs
+++ b/Ryujinx.Memory/MemoryAllocationFlags.cs
@@ -35,6 +35,18 @@ namespace Ryujinx.Memory
         /// Indicates that the memory block should support mapping views of a mirrorable memory block.
         /// The block that is to have their views mapped should be created with the <see cref="Mirrorable"/> flag.
         /// </summary>
-        ViewCompatible = 1 << 3
+        ViewCompatible = 1 << 3,
+
+        /// <summary>
+        /// If used with the <see cref="Mirrorable"/> flag, indicates that the memory block will only be used as
+        /// backing storage and will never be accessed directly, so the memory for the block will not be mapped.
+        /// </summary>
+        NoMap = 1 << 4,
+
+        /// <summary>
+        /// Indicates that the memory will be used to store JIT generated code.
+        /// On some platforms, this requires special flags to be passed that will allow the memory to be executable.
+        /// </summary>
+        Jit = 1 << 5
     }
 }
diff --git a/Ryujinx.Memory/MemoryBlock.cs b/Ryujinx.Memory/MemoryBlock.cs
index 6b9d852de3..e1f19c27a8 100644
--- a/Ryujinx.Memory/MemoryBlock.cs
+++ b/Ryujinx.Memory/MemoryBlock.cs
@@ -1,6 +1,6 @@
 using System;
-using System.Collections.Concurrent;
 using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
 using System.Threading;
 
 namespace Ryujinx.Memory
@@ -13,10 +13,9 @@ namespace Ryujinx.Memory
         private readonly bool _usesSharedMemory;
         private readonly bool _isMirror;
         private readonly bool _viewCompatible;
+        private readonly bool _forJit;
         private IntPtr _sharedMemory;
         private IntPtr _pointer;
-        private ConcurrentDictionary<MemoryBlock, byte> _viewStorages;
-        private int _viewCount;
 
         /// <summary>
         /// Pointer to the memory block data.
@@ -40,24 +39,27 @@ namespace Ryujinx.Memory
             if (flags.HasFlag(MemoryAllocationFlags.Mirrorable))
             {
                 _sharedMemory = MemoryManagement.CreateSharedMemory(size, flags.HasFlag(MemoryAllocationFlags.Reserve));
-                _pointer = MemoryManagement.MapSharedMemory(_sharedMemory, size);
+
+                if (!flags.HasFlag(MemoryAllocationFlags.NoMap))
+                {
+                    _pointer = MemoryManagement.MapSharedMemory(_sharedMemory, size);
+                }
+
                 _usesSharedMemory = true;
             }
             else if (flags.HasFlag(MemoryAllocationFlags.Reserve))
             {
                 _viewCompatible = flags.HasFlag(MemoryAllocationFlags.ViewCompatible);
-                _pointer = MemoryManagement.Reserve(size, _viewCompatible);
+                _forJit = flags.HasFlag(MemoryAllocationFlags.Jit);
+                _pointer = MemoryManagement.Reserve(size, _forJit, _viewCompatible);
             }
             else
             {
-                _pointer = MemoryManagement.Allocate(size);
+                _forJit = flags.HasFlag(MemoryAllocationFlags.Jit);
+                _pointer = MemoryManagement.Allocate(size, _forJit);
             }
 
             Size = size;
-
-            _viewStorages = new ConcurrentDictionary<MemoryBlock, byte>();
-            _viewStorages.TryAdd(this, 0);
-            _viewCount = 1;
         }
 
         /// <summary>
@@ -104,7 +106,7 @@ namespace Ryujinx.Memory
         /// <exception cref="InvalidMemoryRegionException">Throw when either <paramref name="offset"/> or <paramref name="size"/> are out of range</exception>
         public bool Commit(ulong offset, ulong size)
         {
-            return MemoryManagement.Commit(GetPointerInternal(offset, size), size);
+            return MemoryManagement.Commit(GetPointerInternal(offset, size), size, _forJit);
         }
 
         /// <summary>
@@ -138,11 +140,6 @@ namespace Ryujinx.Memory
                 throw new ArgumentException("The source memory block is not mirrorable, and thus cannot be mapped on the current block.");
             }
 
-            if (_viewStorages.TryAdd(srcBlock, 0))
-            {
-                srcBlock.IncrementViewCount();
-            }
-
             MemoryManagement.MapView(srcBlock._sharedMemory, srcOffset, GetPointerInternal(dstOffset, size), size, this);
         }
 
@@ -403,33 +400,16 @@ namespace Ryujinx.Memory
                 {
                     MemoryManagement.Free(ptr, Size);
                 }
-
-                foreach (MemoryBlock viewStorage in _viewStorages.Keys)
-                {
-                    viewStorage.DecrementViewCount();
-                }
-
-                _viewStorages.Clear();
             }
-        }
 
-        /// <summary>
-        /// Increments the number of views that uses this memory block as storage.
-        /// </summary>
-        private void IncrementViewCount()
-        {
-            Interlocked.Increment(ref _viewCount);
-        }
-
-        /// <summary>
-        /// Decrements the number of views that uses this memory block as storage.
-        /// </summary>
-        private void DecrementViewCount()
-        {
-            if (Interlocked.Decrement(ref _viewCount) == 0 && _sharedMemory != IntPtr.Zero && !_isMirror)
+            if (!_isMirror)
             {
-                MemoryManagement.DestroySharedMemory(_sharedMemory);
-                _sharedMemory = IntPtr.Zero;
+                IntPtr sharedMemory = Interlocked.Exchange(ref _sharedMemory, IntPtr.Zero);
+
+                if (sharedMemory != IntPtr.Zero)
+                {
+                    MemoryManagement.DestroySharedMemory(sharedMemory);
+                }
             }
         }
 
@@ -453,6 +433,16 @@ namespace Ryujinx.Memory
             return true;
         }
 
+        public static ulong GetPageSize()
+        {
+            if (OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
+            {
+                return 1UL << 14;
+            }
+
+            return 1UL << 12;
+        }
+
         private static void ThrowInvalidMemoryRegionException() => throw new InvalidMemoryRegionException();
     }
 }
diff --git a/Ryujinx.Memory/MemoryManagement.cs b/Ryujinx.Memory/MemoryManagement.cs
index 7c042eba33..c4b5ac4c90 100644
--- a/Ryujinx.Memory/MemoryManagement.cs
+++ b/Ryujinx.Memory/MemoryManagement.cs
@@ -4,7 +4,7 @@ namespace Ryujinx.Memory
 {
     public static class MemoryManagement
     {
-        public static IntPtr Allocate(ulong size)
+        public static IntPtr Allocate(ulong size, bool forJit)
         {
             if (OperatingSystem.IsWindows())
             {
@@ -12,7 +12,7 @@ namespace Ryujinx.Memory
             }
             else if (OperatingSystem.IsLinux() || OperatingSystem.IsMacOS())
             {
-                return MemoryManagementUnix.Allocate(size);
+                return MemoryManagementUnix.Allocate(size, forJit);
             }
             else
             {
@@ -20,7 +20,7 @@ namespace Ryujinx.Memory
             }
         }
 
-        public static IntPtr Reserve(ulong size, bool viewCompatible)
+        public static IntPtr Reserve(ulong size, bool forJit, bool viewCompatible)
         {
             if (OperatingSystem.IsWindows())
             {
@@ -28,7 +28,7 @@ namespace Ryujinx.Memory
             }
             else if (OperatingSystem.IsLinux() || OperatingSystem.IsMacOS())
             {
-                return MemoryManagementUnix.Reserve(size);
+                return MemoryManagementUnix.Reserve(size, forJit);
             }
             else
             {
@@ -36,7 +36,7 @@ namespace Ryujinx.Memory
             }
         }
 
-        public static bool Commit(IntPtr address, ulong size)
+        public static bool Commit(IntPtr address, ulong size, bool forJit)
         {
             if (OperatingSystem.IsWindows())
             {
@@ -44,7 +44,7 @@ namespace Ryujinx.Memory
             }
             else if (OperatingSystem.IsLinux() || OperatingSystem.IsMacOS())
             {
-                return MemoryManagementUnix.Commit(address, size);
+                return MemoryManagementUnix.Commit(address, size, forJit);
             }
             else
             {
diff --git a/Ryujinx.Memory/MemoryManagementUnix.cs b/Ryujinx.Memory/MemoryManagementUnix.cs
index df3fcea91b..affcff92b7 100644
--- a/Ryujinx.Memory/MemoryManagementUnix.cs
+++ b/Ryujinx.Memory/MemoryManagementUnix.cs
@@ -13,17 +13,17 @@ namespace Ryujinx.Memory
     {
         private static readonly ConcurrentDictionary<IntPtr, ulong> _allocations = new ConcurrentDictionary<IntPtr, ulong>();
 
-        public static IntPtr Allocate(ulong size)
+        public static IntPtr Allocate(ulong size, bool forJit)
         {
-            return AllocateInternal(size, MmapProts.PROT_READ | MmapProts.PROT_WRITE);
+            return AllocateInternal(size, MmapProts.PROT_READ | MmapProts.PROT_WRITE, forJit);
         }
 
-        public static IntPtr Reserve(ulong size)
+        public static IntPtr Reserve(ulong size, bool forJit)
         {
-            return AllocateInternal(size, MmapProts.PROT_NONE);
+            return AllocateInternal(size, MmapProts.PROT_NONE, forJit);
         }
 
-        private static IntPtr AllocateInternal(ulong size, MmapProts prot, bool shared = false)
+        private static IntPtr AllocateInternal(ulong size, MmapProts prot, bool forJit, bool shared = false)
         {
             MmapFlags flags = MmapFlags.MAP_ANONYMOUS;
 
@@ -41,6 +41,16 @@ namespace Ryujinx.Memory
                 flags |= MmapFlags.MAP_NORESERVE;
             }
 
+            if (OperatingSystem.IsMacOSVersionAtLeast(10, 14) && forJit)
+            {
+                flags |= MmapFlags.MAP_JIT_DARWIN;
+
+                if (prot == (MmapProts.PROT_READ | MmapProts.PROT_WRITE))
+                {
+                    prot |= MmapProts.PROT_EXEC;
+                }
+            }
+
             IntPtr ptr = mmap(IntPtr.Zero, size, prot, flags, -1, 0);
 
             if (ptr == new IntPtr(-1L))
@@ -57,9 +67,16 @@ namespace Ryujinx.Memory
             return ptr;
         }
 
-        public static bool Commit(IntPtr address, ulong size)
+        public static bool Commit(IntPtr address, ulong size, bool forJit)
         {
-            return mprotect(address, size, MmapProts.PROT_READ | MmapProts.PROT_WRITE) == 0;
+            MmapProts prot = MmapProts.PROT_READ | MmapProts.PROT_WRITE;
+
+            if (OperatingSystem.IsMacOSVersionAtLeast(10, 14) && forJit)
+            {
+                prot |= MmapProts.PROT_EXEC;
+            }
+
+            return mprotect(address, size, prot) == 0;
         }
 
         public static bool Decommit(IntPtr address, ulong size)
diff --git a/Ryujinx.Memory/MemoryManagerUnixHelper.cs b/Ryujinx.Memory/MemoryManagerUnixHelper.cs
index 87a81a79b2..204f1ca4dd 100644
--- a/Ryujinx.Memory/MemoryManagerUnixHelper.cs
+++ b/Ryujinx.Memory/MemoryManagerUnixHelper.cs
@@ -22,7 +22,8 @@ namespace Ryujinx.Memory
             MAP_ANONYMOUS = 4,
             MAP_NORESERVE = 8,
             MAP_FIXED = 16,
-            MAP_UNLOCKED = 32
+            MAP_UNLOCKED = 32,
+            MAP_JIT_DARWIN = 0x800
         }
 
         [Flags]
@@ -45,7 +46,6 @@ namespace Ryujinx.Memory
         private const int MAP_UNLOCKED_LINUX_GENERIC = 0x80000;
 
         private const int MAP_NORESERVE_DARWIN = 0x40;
-        private const int MAP_JIT_DARWIN = 0x800;
         private const int MAP_ANONYMOUS_DARWIN = 0x1000;
 
         public const int MADV_DONTNEED = 4;
@@ -151,10 +151,9 @@ namespace Ryujinx.Memory
                 }
             }
 
-            if (OperatingSystem.IsMacOSVersionAtLeast(10, 14))
+            if (flags.HasFlag(MmapFlags.MAP_JIT_DARWIN) && OperatingSystem.IsMacOSVersionAtLeast(10, 14))
             {
-                // Only to be used with the Hardened Runtime.
-                // result |= MAP_JIT_DARWIN;
+                result |= (int)MmapFlags.MAP_JIT_DARWIN;
             }
 
             return result;