From 45ce540b9b756f372840e923b73cfd7e3edd85f8 Mon Sep 17 00:00:00 2001
From: Wunk <wunkolo@gmail.com>
Date: Sun, 2 Oct 2022 02:17:19 -0700
Subject: [PATCH] ARMeilleure: Add `gfni` acceleration (#3669)

* ARMeilleure: Add `GFNI` detection

This is intended for utilizing the `gf2p8affineqb` instruction

* ARMeilleure: Add `gf2p8affineqb`

Not using the VEX or EVEX-form of this instruction is intentional. There
are `GFNI`-chips that do not support AVX(so no VEX encoding) such as
Tremont(Lakefield) chips as well as Jasper Lake.

https://github.com/InstLatx64/InstLatx64/blob/13df339fe7150b114929f71b19a6b2fe72fc751e/GenuineIntel/GenuineIntel00806A1_Lakefield_LC_InstLatX64.txt#L1297-L1299

https://github.com/InstLatx64/InstLatx64/blob/13df339fe7150b114929f71b19a6b2fe72fc751e/GenuineIntel/GenuineIntel00906C0_JasperLake_InstLatX64.txt#L1252-L1254

* ARMeilleure: Add `gfni` acceleration of `Rbit_V`

Passes all `Rbit_V*` unit tests on my `i9-11900k`

* ARMeilleure: Add `gfni` acceleration of `S{l,r}i_V`

Also added a fast-path for when the shift amount is greater than the
size of the element.

* ARMeilleure: Add `gfni` acceleration of `Shl_V` and `Sshr_V`

* ARMeilleure: Increment InternalVersion

* ARMeilleure: Fix Intrinsic and Assembler Table alignment

`gf2p8affineqb` is the longest instruction name I know of. It shouldn't
get any wider than this.

* ARMeilleure: Remove SSE2+SHA requirement for GFNI

* ARMeilleure Add `X86GetGf2p8LogicalShiftLeft`

Used to generate GF(2^8) 8x8 bit-matrices for bit-shifting for the `gf2p8affineqb` instruction.

* ARMeilleure: Append `FeatureInfo7Ecx` to `FeatureInfo`
---
 ARMeilleure/CodeGen/X86/AssemblerTable.cs     | 437 +++++++++---------
 .../CodeGen/X86/HardwareCapabilities.cs       |  11 +-
 ARMeilleure/CodeGen/X86/IntrinsicTable.cs     | 341 +++++++-------
 ARMeilleure/CodeGen/X86/X86Instruction.cs     |   1 +
 .../Instructions/InstEmitSimdHelper.cs        |  15 +
 .../Instructions/InstEmitSimdLogical.cs       |  43 +-
 ARMeilleure/Instructions/InstEmitSimdShift.cs | 136 +++++-
 .../IntermediateRepresentation/Intrinsic.cs   |   1 +
 ARMeilleure/Optimizations.cs                  |   2 +
 ARMeilleure/Translation/PTC/Ptc.cs            |  11 +-
 10 files changed, 589 insertions(+), 409 deletions(-)

diff --git a/ARMeilleure/CodeGen/X86/AssemblerTable.cs b/ARMeilleure/CodeGen/X86/AssemblerTable.cs
index e40ffad484..ecdc029f96 100644
--- a/ARMeilleure/CodeGen/X86/AssemblerTable.cs
+++ b/ARMeilleure/CodeGen/X86/AssemblerTable.cs
@@ -62,224 +62,225 @@ namespace ARMeilleure.CodeGen.X86
             _instTable = new InstructionInfo[(int)X86Instruction.Count];
 
             //  Name                                             RM/R        RM/I8       RM/I32      R/I64       R/RM        Flags
-            Add(X86Instruction.Add,          new InstructionInfo(0x00000001, 0x00000083, 0x00000081, BadOp,      0x00000003, InstructionFlags.None));
-            Add(X86Instruction.Addpd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f58, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Addps,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f58, InstructionFlags.Vex));
-            Add(X86Instruction.Addsd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF2));
-            Add(X86Instruction.Addss,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF3));
-            Add(X86Instruction.Aesdec,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38de, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Aesdeclast,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38df, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Aesenc,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38dc, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Aesenclast,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38dd, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Aesimc,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38db, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.And,          new InstructionInfo(0x00000021, 0x04000083, 0x04000081, BadOp,      0x00000023, InstructionFlags.None));
-            Add(X86Instruction.Andnpd,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f55, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Andnps,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f55, InstructionFlags.Vex));
-            Add(X86Instruction.Andpd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f54, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Andps,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f54, InstructionFlags.Vex));
-            Add(X86Instruction.Blendvpd,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3815, InstructionFlags.Prefix66));
-            Add(X86Instruction.Blendvps,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3814, InstructionFlags.Prefix66));
-            Add(X86Instruction.Bsr,          new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fbd, InstructionFlags.None));
-            Add(X86Instruction.Bswap,        new InstructionInfo(0x00000fc8, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.RegOnly));
-            Add(X86Instruction.Call,         new InstructionInfo(0x020000ff, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.None));
-            Add(X86Instruction.Cmovcc,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f40, InstructionFlags.None));
-            Add(X86Instruction.Cmp,          new InstructionInfo(0x00000039, 0x07000083, 0x07000081, BadOp,      0x0000003b, InstructionFlags.None));
-            Add(X86Instruction.Cmppd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fc2, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Cmpps,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fc2, InstructionFlags.Vex));
-            Add(X86Instruction.Cmpsd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF2));
-            Add(X86Instruction.Cmpss,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF3));
-            Add(X86Instruction.Cmpxchg,      new InstructionInfo(0x00000fb1, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.None));
-            Add(X86Instruction.Cmpxchg16b,   new InstructionInfo(0x01000fc7, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.RexW));
-            Add(X86Instruction.Cmpxchg8,     new InstructionInfo(0x00000fb0, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.Reg8Src));
-            Add(X86Instruction.Comisd,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f2f, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Comiss,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f2f, InstructionFlags.Vex));
-            Add(X86Instruction.Crc32,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38f1, InstructionFlags.PrefixF2));
-            Add(X86Instruction.Crc32_16,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38f1, InstructionFlags.PrefixF2 | InstructionFlags.Prefix66));
-            Add(X86Instruction.Crc32_8,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38f0, InstructionFlags.PrefixF2 | InstructionFlags.Reg8Src));
-            Add(X86Instruction.Cvtdq2pd,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF3));
-            Add(X86Instruction.Cvtdq2ps,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5b, InstructionFlags.Vex));
-            Add(X86Instruction.Cvtpd2dq,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF2));
-            Add(X86Instruction.Cvtpd2ps,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5a, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Cvtps2dq,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5b, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Cvtps2pd,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5a, InstructionFlags.Vex));
-            Add(X86Instruction.Cvtsd2si,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f2d, InstructionFlags.Vex | InstructionFlags.PrefixF2));
-            Add(X86Instruction.Cvtsd2ss,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF2));
-            Add(X86Instruction.Cvtsi2sd,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF2));
-            Add(X86Instruction.Cvtsi2ss,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF3));
-            Add(X86Instruction.Cvtss2sd,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF3));
-            Add(X86Instruction.Cvtss2si,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f2d, InstructionFlags.Vex | InstructionFlags.PrefixF3));
-            Add(X86Instruction.Div,          new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x060000f7, InstructionFlags.None));
-            Add(X86Instruction.Divpd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5e, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Divps,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5e, InstructionFlags.Vex));
-            Add(X86Instruction.Divsd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5e, InstructionFlags.Vex | InstructionFlags.PrefixF2));
-            Add(X86Instruction.Divss,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5e, InstructionFlags.Vex | InstructionFlags.PrefixF3));
-            Add(X86Instruction.Haddpd,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f7c, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Haddps,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f7c, InstructionFlags.Vex | InstructionFlags.PrefixF2));
-            Add(X86Instruction.Idiv,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x070000f7, InstructionFlags.None));
-            Add(X86Instruction.Imul,         new InstructionInfo(BadOp,      0x0000006b, 0x00000069, BadOp,      0x00000faf, InstructionFlags.None));
-            Add(X86Instruction.Imul128,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x050000f7, InstructionFlags.None));
-            Add(X86Instruction.Insertps,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a21, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Jmp,          new InstructionInfo(0x040000ff, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.None));
-            Add(X86Instruction.Ldmxcsr,      new InstructionInfo(0x02000fae, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.Vex));
-            Add(X86Instruction.Lea,          new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x0000008d, InstructionFlags.None));
-            Add(X86Instruction.Maxpd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5f, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Maxps,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5f, InstructionFlags.Vex));
-            Add(X86Instruction.Maxsd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5f, InstructionFlags.Vex | InstructionFlags.PrefixF2));
-            Add(X86Instruction.Maxss,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5f, InstructionFlags.Vex | InstructionFlags.PrefixF3));
-            Add(X86Instruction.Minpd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5d, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Minps,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5d, InstructionFlags.Vex));
-            Add(X86Instruction.Minsd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5d, InstructionFlags.Vex | InstructionFlags.PrefixF2));
-            Add(X86Instruction.Minss,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5d, InstructionFlags.Vex | InstructionFlags.PrefixF3));
-            Add(X86Instruction.Mov,          new InstructionInfo(0x00000089, BadOp,      0x000000c7, 0x000000b8, 0x0000008b, InstructionFlags.None));
-            Add(X86Instruction.Mov16,        new InstructionInfo(0x00000089, BadOp,      0x000000c7, BadOp,      0x0000008b, InstructionFlags.Prefix66));
-            Add(X86Instruction.Mov8,         new InstructionInfo(0x00000088, 0x000000c6, BadOp,      BadOp,      0x0000008a, InstructionFlags.Reg8Src | InstructionFlags.Reg8Dest));
-            Add(X86Instruction.Movd,         new InstructionInfo(0x00000f7e, BadOp,      BadOp,      BadOp,      0x00000f6e, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Movdqu,       new InstructionInfo(0x00000f7f, BadOp,      BadOp,      BadOp,      0x00000f6f, InstructionFlags.Vex | InstructionFlags.PrefixF3));
-            Add(X86Instruction.Movhlps,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f12, InstructionFlags.Vex));
-            Add(X86Instruction.Movlhps,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f16, InstructionFlags.Vex));
-            Add(X86Instruction.Movq,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f7e, InstructionFlags.Vex | InstructionFlags.PrefixF3));
-            Add(X86Instruction.Movsd,        new InstructionInfo(0x00000f11, BadOp,      BadOp,      BadOp,      0x00000f10, InstructionFlags.Vex | InstructionFlags.PrefixF2));
-            Add(X86Instruction.Movss,        new InstructionInfo(0x00000f11, BadOp,      BadOp,      BadOp,      0x00000f10, InstructionFlags.Vex | InstructionFlags.PrefixF3));
-            Add(X86Instruction.Movsx16,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fbf, InstructionFlags.None));
-            Add(X86Instruction.Movsx32,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000063, InstructionFlags.None));
-            Add(X86Instruction.Movsx8,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fbe, InstructionFlags.Reg8Src));
-            Add(X86Instruction.Movzx16,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fb7, InstructionFlags.None));
-            Add(X86Instruction.Movzx8,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fb6, InstructionFlags.Reg8Src));
-            Add(X86Instruction.Mul128,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x040000f7, InstructionFlags.None));
-            Add(X86Instruction.Mulpd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f59, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Mulps,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f59, InstructionFlags.Vex));
-            Add(X86Instruction.Mulsd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f59, InstructionFlags.Vex | InstructionFlags.PrefixF2));
-            Add(X86Instruction.Mulss,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f59, InstructionFlags.Vex | InstructionFlags.PrefixF3));
-            Add(X86Instruction.Neg,          new InstructionInfo(0x030000f7, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.None));
-            Add(X86Instruction.Not,          new InstructionInfo(0x020000f7, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.None));
-            Add(X86Instruction.Or,           new InstructionInfo(0x00000009, 0x01000083, 0x01000081, BadOp,      0x0000000b, InstructionFlags.None));
-            Add(X86Instruction.Paddb,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000ffc, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Paddd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000ffe, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Paddq,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fd4, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Paddw,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000ffd, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Palignr,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a0f, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pand,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fdb, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pandn,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fdf, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pavgb,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fe0, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pavgw,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fe3, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pblendvb,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3810, InstructionFlags.Prefix66));
-            Add(X86Instruction.Pclmulqdq,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a44, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pcmpeqb,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f74, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pcmpeqd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f76, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pcmpeqq,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3829, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pcmpeqw,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f75, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pcmpgtb,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f64, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pcmpgtd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f66, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pcmpgtq,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3837, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pcmpgtw,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f65, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pextrb,       new InstructionInfo(0x000f3a14, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pextrd,       new InstructionInfo(0x000f3a16, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pextrq,       new InstructionInfo(0x000f3a16, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.Vex | InstructionFlags.RexW | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pextrw,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fc5, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pinsrb,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a20, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pinsrd,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a22, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pinsrq,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a22, InstructionFlags.Vex | InstructionFlags.RexW | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pinsrw,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fc4, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pmaxsb,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f383c, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pmaxsd,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f383d, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pmaxsw,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fee, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pmaxub,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fde, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pmaxud,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f383f, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pmaxuw,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f383e, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pminsb,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3838, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pminsd,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3839, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pminsw,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fea, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pminub,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fda, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pminud,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f383b, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pminuw,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f383a, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pmovsxbw,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3820, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pmovsxdq,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3825, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pmovsxwd,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3823, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pmovzxbw,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3830, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pmovzxdq,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3835, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pmovzxwd,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3833, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pmulld,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3840, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pmullw,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fd5, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pop,          new InstructionInfo(0x0000008f, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.None));
-            Add(X86Instruction.Popcnt,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fb8, InstructionFlags.PrefixF3));
-            Add(X86Instruction.Por,          new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000feb, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pshufb,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3800, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pshufd,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f70, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pslld,        new InstructionInfo(BadOp,      0x06000f72, BadOp,      BadOp,      0x00000ff2, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Pslldq,       new InstructionInfo(BadOp,      0x07000f73, BadOp,      BadOp,      BadOp,      InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Psllq,        new InstructionInfo(BadOp,      0x06000f73, BadOp,      BadOp,      0x00000ff3, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Psllw,        new InstructionInfo(BadOp,      0x06000f71, BadOp,      BadOp,      0x00000ff1, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Psrad,        new InstructionInfo(BadOp,      0x04000f72, BadOp,      BadOp,      0x00000fe2, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Psraw,        new InstructionInfo(BadOp,      0x04000f71, BadOp,      BadOp,      0x00000fe1, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Psrld,        new InstructionInfo(BadOp,      0x02000f72, BadOp,      BadOp,      0x00000fd2, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Psrlq,        new InstructionInfo(BadOp,      0x02000f73, BadOp,      BadOp,      0x00000fd3, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Psrldq,       new InstructionInfo(BadOp,      0x03000f73, BadOp,      BadOp,      BadOp,      InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Psrlw,        new InstructionInfo(BadOp,      0x02000f71, BadOp,      BadOp,      0x00000fd1, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Psubb,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000ff8, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Psubd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000ffa, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Psubq,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000ffb, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Psubw,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000ff9, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Punpckhbw,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f68, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Punpckhdq,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f6a, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Punpckhqdq,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f6d, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Punpckhwd,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f69, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Punpcklbw,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f60, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Punpckldq,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f62, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Punpcklqdq,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f6c, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Punpcklwd,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f61, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Push,         new InstructionInfo(BadOp,      0x0000006a, 0x00000068, BadOp,      0x060000ff, InstructionFlags.None));
-            Add(X86Instruction.Pxor,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fef, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Rcpps,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f53, InstructionFlags.Vex));
-            Add(X86Instruction.Rcpss,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f53, InstructionFlags.Vex | InstructionFlags.PrefixF3));
-            Add(X86Instruction.Ror,          new InstructionInfo(0x010000d3, 0x010000c1, BadOp,      BadOp,      BadOp,      InstructionFlags.None));
-            Add(X86Instruction.Roundpd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a09, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Roundps,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a08, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Roundsd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a0b, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Roundss,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a0a, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Rsqrtps,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f52, InstructionFlags.Vex));
-            Add(X86Instruction.Rsqrtss,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f52, InstructionFlags.Vex | InstructionFlags.PrefixF3));
-            Add(X86Instruction.Sar,          new InstructionInfo(0x070000d3, 0x070000c1, BadOp,      BadOp,      BadOp,      InstructionFlags.None));
-            Add(X86Instruction.Setcc,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f90, InstructionFlags.Reg8Dest));
-            Add(X86Instruction.Sha256Msg1,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38cc, InstructionFlags.None));
-            Add(X86Instruction.Sha256Msg2,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38cd, InstructionFlags.None));
-            Add(X86Instruction.Sha256Rnds2,  new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38cb, InstructionFlags.None));
-            Add(X86Instruction.Shl,          new InstructionInfo(0x040000d3, 0x040000c1, BadOp,      BadOp,      BadOp,      InstructionFlags.None));
-            Add(X86Instruction.Shr,          new InstructionInfo(0x050000d3, 0x050000c1, BadOp,      BadOp,      BadOp,      InstructionFlags.None));
-            Add(X86Instruction.Shufpd,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fc6, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Shufps,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fc6, InstructionFlags.Vex));
-            Add(X86Instruction.Sqrtpd,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f51, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Sqrtps,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f51, InstructionFlags.Vex));
-            Add(X86Instruction.Sqrtsd,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF2));
-            Add(X86Instruction.Sqrtss,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF3));
-            Add(X86Instruction.Stmxcsr,      new InstructionInfo(0x03000fae, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.Vex));
-            Add(X86Instruction.Sub,          new InstructionInfo(0x00000029, 0x05000083, 0x05000081, BadOp,      0x0000002b, InstructionFlags.None));
-            Add(X86Instruction.Subpd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5c, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Subps,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5c, InstructionFlags.Vex));
-            Add(X86Instruction.Subsd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5c, InstructionFlags.Vex | InstructionFlags.PrefixF2));
-            Add(X86Instruction.Subss,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5c, InstructionFlags.Vex | InstructionFlags.PrefixF3));
-            Add(X86Instruction.Test,         new InstructionInfo(0x00000085, BadOp,      0x000000f7, BadOp,      BadOp,      InstructionFlags.None));
-            Add(X86Instruction.Unpckhpd,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f15, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Unpckhps,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f15, InstructionFlags.Vex));
-            Add(X86Instruction.Unpcklpd,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f14, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Unpcklps,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f14, InstructionFlags.Vex));
-            Add(X86Instruction.Vblendvpd,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a4b, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Vblendvps,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a4a, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Vcvtph2ps,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3813, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Vcvtps2ph,    new InstructionInfo(0x000f3a1d, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Vfmadd231ps,  new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Vfmadd231sd,  new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
-            Add(X86Instruction.Vfmadd231ss,  new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Vfmsub231sd,  new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
-            Add(X86Instruction.Vfmsub231ss,  new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Vfnmadd231ps, new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Vfnmadd231sd, new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
-            Add(X86Instruction.Vfnmadd231ss, new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Vfnmsub231sd, new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
-            Add(X86Instruction.Vfnmsub231ss, new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Vpblendvb,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Xor,          new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp,      0x00000033, InstructionFlags.None));
-            Add(X86Instruction.Xorpd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66));
-            Add(X86Instruction.Xorps,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f57, InstructionFlags.Vex));
+            Add(X86Instruction.Add,           new InstructionInfo(0x00000001, 0x00000083, 0x00000081, BadOp,      0x00000003, InstructionFlags.None));
+            Add(X86Instruction.Addpd,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f58, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Addps,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f58, InstructionFlags.Vex));
+            Add(X86Instruction.Addsd,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Addss,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Aesdec,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38de, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Aesdeclast,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38df, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Aesenc,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38dc, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Aesenclast,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38dd, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Aesimc,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38db, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.And,           new InstructionInfo(0x00000021, 0x04000083, 0x04000081, BadOp,      0x00000023, InstructionFlags.None));
+            Add(X86Instruction.Andnpd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f55, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Andnps,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f55, InstructionFlags.Vex));
+            Add(X86Instruction.Andpd,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f54, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Andps,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f54, InstructionFlags.Vex));
+            Add(X86Instruction.Blendvpd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3815, InstructionFlags.Prefix66));
+            Add(X86Instruction.Blendvps,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3814, InstructionFlags.Prefix66));
+            Add(X86Instruction.Bsr,           new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fbd, InstructionFlags.None));
+            Add(X86Instruction.Bswap,         new InstructionInfo(0x00000fc8, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.RegOnly));
+            Add(X86Instruction.Call,          new InstructionInfo(0x020000ff, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.None));
+            Add(X86Instruction.Cmovcc,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f40, InstructionFlags.None));
+            Add(X86Instruction.Cmp,           new InstructionInfo(0x00000039, 0x07000083, 0x07000081, BadOp,      0x0000003b, InstructionFlags.None));
+            Add(X86Instruction.Cmppd,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fc2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Cmpps,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fc2, InstructionFlags.Vex));
+            Add(X86Instruction.Cmpsd,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Cmpss,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Cmpxchg,       new InstructionInfo(0x00000fb1, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.None));
+            Add(X86Instruction.Cmpxchg16b,    new InstructionInfo(0x01000fc7, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.RexW));
+            Add(X86Instruction.Cmpxchg8,      new InstructionInfo(0x00000fb0, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.Reg8Src));
+            Add(X86Instruction.Comisd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f2f, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Comiss,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f2f, InstructionFlags.Vex));
+            Add(X86Instruction.Crc32,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38f1, InstructionFlags.PrefixF2));
+            Add(X86Instruction.Crc32_16,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38f1, InstructionFlags.PrefixF2 | InstructionFlags.Prefix66));
+            Add(X86Instruction.Crc32_8,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38f0, InstructionFlags.PrefixF2 | InstructionFlags.Reg8Src));
+            Add(X86Instruction.Cvtdq2pd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Cvtdq2ps,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5b, InstructionFlags.Vex));
+            Add(X86Instruction.Cvtpd2dq,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Cvtpd2ps,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Cvtps2dq,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5b, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Cvtps2pd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5a, InstructionFlags.Vex));
+            Add(X86Instruction.Cvtsd2si,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f2d, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Cvtsd2ss,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Cvtsi2sd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Cvtsi2ss,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Cvtss2sd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Cvtss2si,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f2d, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Div,           new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x060000f7, InstructionFlags.None));
+            Add(X86Instruction.Divpd,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5e, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Divps,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5e, InstructionFlags.Vex));
+            Add(X86Instruction.Divsd,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5e, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Divss,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5e, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Gf2p8affineqb, new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3ace, InstructionFlags.Prefix66));
+            Add(X86Instruction.Haddpd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f7c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Haddps,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f7c, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Idiv,          new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x070000f7, InstructionFlags.None));
+            Add(X86Instruction.Imul,          new InstructionInfo(BadOp,      0x0000006b, 0x00000069, BadOp,      0x00000faf, InstructionFlags.None));
+            Add(X86Instruction.Imul128,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x050000f7, InstructionFlags.None));
+            Add(X86Instruction.Insertps,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a21, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Jmp,           new InstructionInfo(0x040000ff, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.None));
+            Add(X86Instruction.Ldmxcsr,       new InstructionInfo(0x02000fae, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.Vex));
+            Add(X86Instruction.Lea,           new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x0000008d, InstructionFlags.None));
+            Add(X86Instruction.Maxpd,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5f, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Maxps,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5f, InstructionFlags.Vex));
+            Add(X86Instruction.Maxsd,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5f, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Maxss,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5f, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Minpd,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5d, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Minps,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5d, InstructionFlags.Vex));
+            Add(X86Instruction.Minsd,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5d, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Minss,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5d, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Mov,           new InstructionInfo(0x00000089, BadOp,      0x000000c7, 0x000000b8, 0x0000008b, InstructionFlags.None));
+            Add(X86Instruction.Mov16,         new InstructionInfo(0x00000089, BadOp,      0x000000c7, BadOp,      0x0000008b, InstructionFlags.Prefix66));
+            Add(X86Instruction.Mov8,          new InstructionInfo(0x00000088, 0x000000c6, BadOp,      BadOp,      0x0000008a, InstructionFlags.Reg8Src | InstructionFlags.Reg8Dest));
+            Add(X86Instruction.Movd,          new InstructionInfo(0x00000f7e, BadOp,      BadOp,      BadOp,      0x00000f6e, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Movdqu,        new InstructionInfo(0x00000f7f, BadOp,      BadOp,      BadOp,      0x00000f6f, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Movhlps,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f12, InstructionFlags.Vex));
+            Add(X86Instruction.Movlhps,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f16, InstructionFlags.Vex));
+            Add(X86Instruction.Movq,          new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f7e, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Movsd,         new InstructionInfo(0x00000f11, BadOp,      BadOp,      BadOp,      0x00000f10, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Movss,         new InstructionInfo(0x00000f11, BadOp,      BadOp,      BadOp,      0x00000f10, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Movsx16,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fbf, InstructionFlags.None));
+            Add(X86Instruction.Movsx32,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000063, InstructionFlags.None));
+            Add(X86Instruction.Movsx8,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fbe, InstructionFlags.Reg8Src));
+            Add(X86Instruction.Movzx16,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fb7, InstructionFlags.None));
+            Add(X86Instruction.Movzx8,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fb6, InstructionFlags.Reg8Src));
+            Add(X86Instruction.Mul128,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x040000f7, InstructionFlags.None));
+            Add(X86Instruction.Mulpd,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f59, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Mulps,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f59, InstructionFlags.Vex));
+            Add(X86Instruction.Mulsd,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f59, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Mulss,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f59, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Neg,           new InstructionInfo(0x030000f7, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.None));
+            Add(X86Instruction.Not,           new InstructionInfo(0x020000f7, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.None));
+            Add(X86Instruction.Or,            new InstructionInfo(0x00000009, 0x01000083, 0x01000081, BadOp,      0x0000000b, InstructionFlags.None));
+            Add(X86Instruction.Paddb,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000ffc, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Paddd,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000ffe, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Paddq,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fd4, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Paddw,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000ffd, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Palignr,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a0f, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pand,          new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fdb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pandn,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fdf, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pavgb,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fe0, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pavgw,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fe3, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pblendvb,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3810, InstructionFlags.Prefix66));
+            Add(X86Instruction.Pclmulqdq,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a44, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pcmpeqb,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f74, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pcmpeqd,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f76, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pcmpeqq,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3829, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pcmpeqw,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f75, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pcmpgtb,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f64, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pcmpgtd,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f66, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pcmpgtq,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3837, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pcmpgtw,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f65, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pextrb,        new InstructionInfo(0x000f3a14, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pextrd,        new InstructionInfo(0x000f3a16, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pextrq,        new InstructionInfo(0x000f3a16, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.Vex | InstructionFlags.RexW | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pextrw,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fc5, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pinsrb,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a20, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pinsrd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a22, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pinsrq,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a22, InstructionFlags.Vex | InstructionFlags.RexW | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pinsrw,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fc4, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmaxsb,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f383c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmaxsd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f383d, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmaxsw,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fee, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmaxub,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fde, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmaxud,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f383f, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmaxuw,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f383e, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pminsb,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3838, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pminsd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3839, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pminsw,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fea, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pminub,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fda, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pminud,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f383b, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pminuw,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f383a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmovsxbw,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3820, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmovsxdq,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3825, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmovsxwd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3823, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmovzxbw,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3830, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmovzxdq,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3835, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmovzxwd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3833, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmulld,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3840, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmullw,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fd5, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pop,           new InstructionInfo(0x0000008f, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.None));
+            Add(X86Instruction.Popcnt,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fb8, InstructionFlags.PrefixF3));
+            Add(X86Instruction.Por,           new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000feb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pshufb,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3800, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pshufd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f70, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pslld,         new InstructionInfo(BadOp,      0x06000f72, BadOp,      BadOp,      0x00000ff2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pslldq,        new InstructionInfo(BadOp,      0x07000f73, BadOp,      BadOp,      BadOp,      InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Psllq,         new InstructionInfo(BadOp,      0x06000f73, BadOp,      BadOp,      0x00000ff3, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Psllw,         new InstructionInfo(BadOp,      0x06000f71, BadOp,      BadOp,      0x00000ff1, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Psrad,         new InstructionInfo(BadOp,      0x04000f72, BadOp,      BadOp,      0x00000fe2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Psraw,         new InstructionInfo(BadOp,      0x04000f71, BadOp,      BadOp,      0x00000fe1, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Psrld,         new InstructionInfo(BadOp,      0x02000f72, BadOp,      BadOp,      0x00000fd2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Psrlq,         new InstructionInfo(BadOp,      0x02000f73, BadOp,      BadOp,      0x00000fd3, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Psrldq,        new InstructionInfo(BadOp,      0x03000f73, BadOp,      BadOp,      BadOp,      InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Psrlw,         new InstructionInfo(BadOp,      0x02000f71, BadOp,      BadOp,      0x00000fd1, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Psubb,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000ff8, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Psubd,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000ffa, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Psubq,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000ffb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Psubw,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000ff9, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Punpckhbw,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f68, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Punpckhdq,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f6a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Punpckhqdq,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f6d, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Punpckhwd,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f69, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Punpcklbw,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f60, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Punpckldq,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f62, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Punpcklqdq,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f6c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Punpcklwd,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f61, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Push,          new InstructionInfo(BadOp,      0x0000006a, 0x00000068, BadOp,      0x060000ff, InstructionFlags.None));
+            Add(X86Instruction.Pxor,          new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fef, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Rcpps,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f53, InstructionFlags.Vex));
+            Add(X86Instruction.Rcpss,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f53, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Ror,           new InstructionInfo(0x010000d3, 0x010000c1, BadOp,      BadOp,      BadOp,      InstructionFlags.None));
+            Add(X86Instruction.Roundpd,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a09, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Roundps,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a08, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Roundsd,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a0b, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Roundss,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a0a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Rsqrtps,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f52, InstructionFlags.Vex));
+            Add(X86Instruction.Rsqrtss,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f52, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Sar,           new InstructionInfo(0x070000d3, 0x070000c1, BadOp,      BadOp,      BadOp,      InstructionFlags.None));
+            Add(X86Instruction.Setcc,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f90, InstructionFlags.Reg8Dest));
+            Add(X86Instruction.Sha256Msg1,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38cc, InstructionFlags.None));
+            Add(X86Instruction.Sha256Msg2,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38cd, InstructionFlags.None));
+            Add(X86Instruction.Sha256Rnds2,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38cb, InstructionFlags.None));
+            Add(X86Instruction.Shl,           new InstructionInfo(0x040000d3, 0x040000c1, BadOp,      BadOp,      BadOp,      InstructionFlags.None));
+            Add(X86Instruction.Shr,           new InstructionInfo(0x050000d3, 0x050000c1, BadOp,      BadOp,      BadOp,      InstructionFlags.None));
+            Add(X86Instruction.Shufpd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fc6, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Shufps,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fc6, InstructionFlags.Vex));
+            Add(X86Instruction.Sqrtpd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f51, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Sqrtps,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f51, InstructionFlags.Vex));
+            Add(X86Instruction.Sqrtsd,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Sqrtss,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Stmxcsr,       new InstructionInfo(0x03000fae, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.Vex));
+            Add(X86Instruction.Sub,           new InstructionInfo(0x00000029, 0x05000083, 0x05000081, BadOp,      0x0000002b, InstructionFlags.None));
+            Add(X86Instruction.Subpd,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Subps,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5c, InstructionFlags.Vex));
+            Add(X86Instruction.Subsd,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5c, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Subss,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5c, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Test,          new InstructionInfo(0x00000085, BadOp,      0x000000f7, BadOp,      BadOp,      InstructionFlags.None));
+            Add(X86Instruction.Unpckhpd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f15, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Unpckhps,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f15, InstructionFlags.Vex));
+            Add(X86Instruction.Unpcklpd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f14, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Unpcklps,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f14, InstructionFlags.Vex));
+            Add(X86Instruction.Vblendvpd,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a4b, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Vblendvps,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a4a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Vcvtph2ps,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3813, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Vcvtps2ph,     new InstructionInfo(0x000f3a1d, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Vfmadd231ps,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Vfmadd231sd,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+            Add(X86Instruction.Vfmadd231ss,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Vfmsub231sd,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+            Add(X86Instruction.Vfmsub231ss,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Vfnmadd231ps,  new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Vfnmadd231sd,  new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+            Add(X86Instruction.Vfnmadd231ss,  new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Vfnmsub231sd,  new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+            Add(X86Instruction.Vfnmsub231ss,  new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Vpblendvb,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Xor,           new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp,      0x00000033, InstructionFlags.None));
+            Add(X86Instruction.Xorpd,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Xorps,         new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f57, InstructionFlags.Vex));
 
             static void Add(X86Instruction inst, in InstructionInfo info)
             {
diff --git a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
index a29dd5befe..c12a4e28b7 100644
--- a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
+++ b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
@@ -20,8 +20,9 @@ namespace ARMeilleure.CodeGen.X86
 
             if (maxNum >= 7)
             {
-                (_, int ebx7, _, _) = X86Base.CpuId(0x00000007, 0x00000000);
+                (_, int ebx7, int ecx7, _) = X86Base.CpuId(0x00000007, 0x00000000);
                 FeatureInfo7Ebx = (FeatureFlags7Ebx)ebx7;
+                FeatureInfo7Ecx = (FeatureFlags7Ecx)ecx7;
             }
         }
 
@@ -54,9 +55,16 @@ namespace ARMeilleure.CodeGen.X86
             Sha = 1 << 29
         }
 
+        [Flags]
+        public enum FeatureFlags7Ecx
+        {
+            Gfni = 1 << 8,
+        }
+
         public static FeatureFlags1Edx FeatureInfo1Edx { get; }
         public static FeatureFlags1Ecx FeatureInfo1Ecx { get; }
         public static FeatureFlags7Ebx FeatureInfo7Ebx { get; } = 0;
+        public static FeatureFlags7Ecx FeatureInfo7Ecx { get; } = 0;
 
         public static bool SupportsSse => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse);
         public static bool SupportsSse2 => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse2);
@@ -72,6 +80,7 @@ namespace ARMeilleure.CodeGen.X86
         public static bool SupportsAvx2 => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx2) && SupportsAvx;
         public static bool SupportsF16c => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.F16c);
         public static bool SupportsSha => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Sha);
+        public static bool SupportsGfni => FeatureInfo7Ecx.HasFlag(FeatureFlags7Ecx.Gfni);
 
         public static bool ForceLegacySse { get; set; }
 
diff --git a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
index ada86cfaec..6407a9a7b4 100644
--- a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
+++ b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
@@ -13,176 +13,177 @@ namespace ARMeilleure.CodeGen.X86
         {
             _intrinTable = new IntrinsicInfo[EnumUtils.GetCount(typeof(Intrinsic))];
 
-            Add(Intrinsic.X86Addpd,        new IntrinsicInfo(X86Instruction.Addpd,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Addps,        new IntrinsicInfo(X86Instruction.Addps,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Addsd,        new IntrinsicInfo(X86Instruction.Addsd,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Addss,        new IntrinsicInfo(X86Instruction.Addss,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Aesdec,       new IntrinsicInfo(X86Instruction.Aesdec,       IntrinsicType.Binary));
-            Add(Intrinsic.X86Aesdeclast,   new IntrinsicInfo(X86Instruction.Aesdeclast,   IntrinsicType.Binary));
-            Add(Intrinsic.X86Aesenc,       new IntrinsicInfo(X86Instruction.Aesenc,       IntrinsicType.Binary));
-            Add(Intrinsic.X86Aesenclast,   new IntrinsicInfo(X86Instruction.Aesenclast,   IntrinsicType.Binary));
-            Add(Intrinsic.X86Aesimc,       new IntrinsicInfo(X86Instruction.Aesimc,       IntrinsicType.Unary));
-            Add(Intrinsic.X86Andnpd,       new IntrinsicInfo(X86Instruction.Andnpd,       IntrinsicType.Binary));
-            Add(Intrinsic.X86Andnps,       new IntrinsicInfo(X86Instruction.Andnps,       IntrinsicType.Binary));
-            Add(Intrinsic.X86Andpd,        new IntrinsicInfo(X86Instruction.Andpd,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Andps,        new IntrinsicInfo(X86Instruction.Andps,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Blendvpd,     new IntrinsicInfo(X86Instruction.Blendvpd,     IntrinsicType.Ternary));
-            Add(Intrinsic.X86Blendvps,     new IntrinsicInfo(X86Instruction.Blendvps,     IntrinsicType.Ternary));
-            Add(Intrinsic.X86Cmppd,        new IntrinsicInfo(X86Instruction.Cmppd,        IntrinsicType.TernaryImm));
-            Add(Intrinsic.X86Cmpps,        new IntrinsicInfo(X86Instruction.Cmpps,        IntrinsicType.TernaryImm));
-            Add(Intrinsic.X86Cmpsd,        new IntrinsicInfo(X86Instruction.Cmpsd,        IntrinsicType.TernaryImm));
-            Add(Intrinsic.X86Cmpss,        new IntrinsicInfo(X86Instruction.Cmpss,        IntrinsicType.TernaryImm));
-            Add(Intrinsic.X86Comisdeq,     new IntrinsicInfo(X86Instruction.Comisd,       IntrinsicType.Comis_));
-            Add(Intrinsic.X86Comisdge,     new IntrinsicInfo(X86Instruction.Comisd,       IntrinsicType.Comis_));
-            Add(Intrinsic.X86Comisdlt,     new IntrinsicInfo(X86Instruction.Comisd,       IntrinsicType.Comis_));
-            Add(Intrinsic.X86Comisseq,     new IntrinsicInfo(X86Instruction.Comiss,       IntrinsicType.Comis_));
-            Add(Intrinsic.X86Comissge,     new IntrinsicInfo(X86Instruction.Comiss,       IntrinsicType.Comis_));
-            Add(Intrinsic.X86Comisslt,     new IntrinsicInfo(X86Instruction.Comiss,       IntrinsicType.Comis_));
-            Add(Intrinsic.X86Crc32,        new IntrinsicInfo(X86Instruction.Crc32,        IntrinsicType.Crc32));
-            Add(Intrinsic.X86Crc32_16,     new IntrinsicInfo(X86Instruction.Crc32_16,     IntrinsicType.Crc32));
-            Add(Intrinsic.X86Crc32_8,      new IntrinsicInfo(X86Instruction.Crc32_8,      IntrinsicType.Crc32));
-            Add(Intrinsic.X86Cvtdq2pd,     new IntrinsicInfo(X86Instruction.Cvtdq2pd,     IntrinsicType.Unary));
-            Add(Intrinsic.X86Cvtdq2ps,     new IntrinsicInfo(X86Instruction.Cvtdq2ps,     IntrinsicType.Unary));
-            Add(Intrinsic.X86Cvtpd2dq,     new IntrinsicInfo(X86Instruction.Cvtpd2dq,     IntrinsicType.Unary));
-            Add(Intrinsic.X86Cvtpd2ps,     new IntrinsicInfo(X86Instruction.Cvtpd2ps,     IntrinsicType.Unary));
-            Add(Intrinsic.X86Cvtps2dq,     new IntrinsicInfo(X86Instruction.Cvtps2dq,     IntrinsicType.Unary));
-            Add(Intrinsic.X86Cvtps2pd,     new IntrinsicInfo(X86Instruction.Cvtps2pd,     IntrinsicType.Unary));
-            Add(Intrinsic.X86Cvtsd2si,     new IntrinsicInfo(X86Instruction.Cvtsd2si,     IntrinsicType.UnaryToGpr));
-            Add(Intrinsic.X86Cvtsd2ss,     new IntrinsicInfo(X86Instruction.Cvtsd2ss,     IntrinsicType.Binary));
-            Add(Intrinsic.X86Cvtsi2sd,     new IntrinsicInfo(X86Instruction.Cvtsi2sd,     IntrinsicType.BinaryGpr));
-            Add(Intrinsic.X86Cvtsi2si,     new IntrinsicInfo(X86Instruction.Movd,         IntrinsicType.UnaryToGpr));
-            Add(Intrinsic.X86Cvtsi2ss,     new IntrinsicInfo(X86Instruction.Cvtsi2ss,     IntrinsicType.BinaryGpr));
-            Add(Intrinsic.X86Cvtss2sd,     new IntrinsicInfo(X86Instruction.Cvtss2sd,     IntrinsicType.Binary));
-            Add(Intrinsic.X86Cvtss2si,     new IntrinsicInfo(X86Instruction.Cvtss2si,     IntrinsicType.UnaryToGpr));
-            Add(Intrinsic.X86Divpd,        new IntrinsicInfo(X86Instruction.Divpd,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Divps,        new IntrinsicInfo(X86Instruction.Divps,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Divsd,        new IntrinsicInfo(X86Instruction.Divsd,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Divss,        new IntrinsicInfo(X86Instruction.Divss,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Haddpd,       new IntrinsicInfo(X86Instruction.Haddpd,       IntrinsicType.Binary));
-            Add(Intrinsic.X86Haddps,       new IntrinsicInfo(X86Instruction.Haddps,       IntrinsicType.Binary));
-            Add(Intrinsic.X86Insertps,     new IntrinsicInfo(X86Instruction.Insertps,     IntrinsicType.TernaryImm));
-            Add(Intrinsic.X86Maxpd,        new IntrinsicInfo(X86Instruction.Maxpd,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Maxps,        new IntrinsicInfo(X86Instruction.Maxps,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Maxsd,        new IntrinsicInfo(X86Instruction.Maxsd,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Maxss,        new IntrinsicInfo(X86Instruction.Maxss,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Minpd,        new IntrinsicInfo(X86Instruction.Minpd,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Minps,        new IntrinsicInfo(X86Instruction.Minps,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Minsd,        new IntrinsicInfo(X86Instruction.Minsd,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Minss,        new IntrinsicInfo(X86Instruction.Minss,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Movhlps,      new IntrinsicInfo(X86Instruction.Movhlps,      IntrinsicType.Binary));
-            Add(Intrinsic.X86Movlhps,      new IntrinsicInfo(X86Instruction.Movlhps,      IntrinsicType.Binary));
-            Add(Intrinsic.X86Movss,        new IntrinsicInfo(X86Instruction.Movss,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Mulpd,        new IntrinsicInfo(X86Instruction.Mulpd,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Mulps,        new IntrinsicInfo(X86Instruction.Mulps,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Mulsd,        new IntrinsicInfo(X86Instruction.Mulsd,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Mulss,        new IntrinsicInfo(X86Instruction.Mulss,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Mxcsrmb,      new IntrinsicInfo(X86Instruction.None,         IntrinsicType.Mxcsr)); // Mask bits.
-            Add(Intrinsic.X86Mxcsrub,      new IntrinsicInfo(X86Instruction.None,         IntrinsicType.Mxcsr)); // Unmask bits.
-            Add(Intrinsic.X86Paddb,        new IntrinsicInfo(X86Instruction.Paddb,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Paddd,        new IntrinsicInfo(X86Instruction.Paddd,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Paddq,        new IntrinsicInfo(X86Instruction.Paddq,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Paddw,        new IntrinsicInfo(X86Instruction.Paddw,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Palignr,      new IntrinsicInfo(X86Instruction.Palignr,      IntrinsicType.TernaryImm));
-            Add(Intrinsic.X86Pand,         new IntrinsicInfo(X86Instruction.Pand,         IntrinsicType.Binary));
-            Add(Intrinsic.X86Pandn,        new IntrinsicInfo(X86Instruction.Pandn,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Pavgb,        new IntrinsicInfo(X86Instruction.Pavgb,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Pavgw,        new IntrinsicInfo(X86Instruction.Pavgw,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Pblendvb,     new IntrinsicInfo(X86Instruction.Pblendvb,     IntrinsicType.Ternary));
-            Add(Intrinsic.X86Pclmulqdq,    new IntrinsicInfo(X86Instruction.Pclmulqdq,    IntrinsicType.TernaryImm));
-            Add(Intrinsic.X86Pcmpeqb,      new IntrinsicInfo(X86Instruction.Pcmpeqb,      IntrinsicType.Binary));
-            Add(Intrinsic.X86Pcmpeqd,      new IntrinsicInfo(X86Instruction.Pcmpeqd,      IntrinsicType.Binary));
-            Add(Intrinsic.X86Pcmpeqq,      new IntrinsicInfo(X86Instruction.Pcmpeqq,      IntrinsicType.Binary));
-            Add(Intrinsic.X86Pcmpeqw,      new IntrinsicInfo(X86Instruction.Pcmpeqw,      IntrinsicType.Binary));
-            Add(Intrinsic.X86Pcmpgtb,      new IntrinsicInfo(X86Instruction.Pcmpgtb,      IntrinsicType.Binary));
-            Add(Intrinsic.X86Pcmpgtd,      new IntrinsicInfo(X86Instruction.Pcmpgtd,      IntrinsicType.Binary));
-            Add(Intrinsic.X86Pcmpgtq,      new IntrinsicInfo(X86Instruction.Pcmpgtq,      IntrinsicType.Binary));
-            Add(Intrinsic.X86Pcmpgtw,      new IntrinsicInfo(X86Instruction.Pcmpgtw,      IntrinsicType.Binary));
-            Add(Intrinsic.X86Pmaxsb,       new IntrinsicInfo(X86Instruction.Pmaxsb,       IntrinsicType.Binary));
-            Add(Intrinsic.X86Pmaxsd,       new IntrinsicInfo(X86Instruction.Pmaxsd,       IntrinsicType.Binary));
-            Add(Intrinsic.X86Pmaxsw,       new IntrinsicInfo(X86Instruction.Pmaxsw,       IntrinsicType.Binary));
-            Add(Intrinsic.X86Pmaxub,       new IntrinsicInfo(X86Instruction.Pmaxub,       IntrinsicType.Binary));
-            Add(Intrinsic.X86Pmaxud,       new IntrinsicInfo(X86Instruction.Pmaxud,       IntrinsicType.Binary));
-            Add(Intrinsic.X86Pmaxuw,       new IntrinsicInfo(X86Instruction.Pmaxuw,       IntrinsicType.Binary));
-            Add(Intrinsic.X86Pminsb,       new IntrinsicInfo(X86Instruction.Pminsb,       IntrinsicType.Binary));
-            Add(Intrinsic.X86Pminsd,       new IntrinsicInfo(X86Instruction.Pminsd,       IntrinsicType.Binary));
-            Add(Intrinsic.X86Pminsw,       new IntrinsicInfo(X86Instruction.Pminsw,       IntrinsicType.Binary));
-            Add(Intrinsic.X86Pminub,       new IntrinsicInfo(X86Instruction.Pminub,       IntrinsicType.Binary));
-            Add(Intrinsic.X86Pminud,       new IntrinsicInfo(X86Instruction.Pminud,       IntrinsicType.Binary));
-            Add(Intrinsic.X86Pminuw,       new IntrinsicInfo(X86Instruction.Pminuw,       IntrinsicType.Binary));
-            Add(Intrinsic.X86Pmovsxbw,     new IntrinsicInfo(X86Instruction.Pmovsxbw,     IntrinsicType.Unary));
-            Add(Intrinsic.X86Pmovsxdq,     new IntrinsicInfo(X86Instruction.Pmovsxdq,     IntrinsicType.Unary));
-            Add(Intrinsic.X86Pmovsxwd,     new IntrinsicInfo(X86Instruction.Pmovsxwd,     IntrinsicType.Unary));
-            Add(Intrinsic.X86Pmovzxbw,     new IntrinsicInfo(X86Instruction.Pmovzxbw,     IntrinsicType.Unary));
-            Add(Intrinsic.X86Pmovzxdq,     new IntrinsicInfo(X86Instruction.Pmovzxdq,     IntrinsicType.Unary));
-            Add(Intrinsic.X86Pmovzxwd,     new IntrinsicInfo(X86Instruction.Pmovzxwd,     IntrinsicType.Unary));
-            Add(Intrinsic.X86Pmulld,       new IntrinsicInfo(X86Instruction.Pmulld,       IntrinsicType.Binary));
-            Add(Intrinsic.X86Pmullw,       new IntrinsicInfo(X86Instruction.Pmullw,       IntrinsicType.Binary));
-            Add(Intrinsic.X86Popcnt,       new IntrinsicInfo(X86Instruction.Popcnt,       IntrinsicType.PopCount));
-            Add(Intrinsic.X86Por,          new IntrinsicInfo(X86Instruction.Por,          IntrinsicType.Binary));
-            Add(Intrinsic.X86Pshufb,       new IntrinsicInfo(X86Instruction.Pshufb,       IntrinsicType.Binary));
-            Add(Intrinsic.X86Pshufd,       new IntrinsicInfo(X86Instruction.Pshufd,       IntrinsicType.BinaryImm));
-            Add(Intrinsic.X86Pslld,        new IntrinsicInfo(X86Instruction.Pslld,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Pslldq,       new IntrinsicInfo(X86Instruction.Pslldq,       IntrinsicType.Binary));
-            Add(Intrinsic.X86Psllq,        new IntrinsicInfo(X86Instruction.Psllq,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Psllw,        new IntrinsicInfo(X86Instruction.Psllw,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Psrad,        new IntrinsicInfo(X86Instruction.Psrad,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Psraw,        new IntrinsicInfo(X86Instruction.Psraw,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Psrld,        new IntrinsicInfo(X86Instruction.Psrld,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Psrlq,        new IntrinsicInfo(X86Instruction.Psrlq,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Psrldq,       new IntrinsicInfo(X86Instruction.Psrldq,       IntrinsicType.Binary));
-            Add(Intrinsic.X86Psrlw,        new IntrinsicInfo(X86Instruction.Psrlw,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Psubb,        new IntrinsicInfo(X86Instruction.Psubb,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Psubd,        new IntrinsicInfo(X86Instruction.Psubd,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Psubq,        new IntrinsicInfo(X86Instruction.Psubq,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Psubw,        new IntrinsicInfo(X86Instruction.Psubw,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Punpckhbw,    new IntrinsicInfo(X86Instruction.Punpckhbw,    IntrinsicType.Binary));
-            Add(Intrinsic.X86Punpckhdq,    new IntrinsicInfo(X86Instruction.Punpckhdq,    IntrinsicType.Binary));
-            Add(Intrinsic.X86Punpckhqdq,   new IntrinsicInfo(X86Instruction.Punpckhqdq,   IntrinsicType.Binary));
-            Add(Intrinsic.X86Punpckhwd,    new IntrinsicInfo(X86Instruction.Punpckhwd,    IntrinsicType.Binary));
-            Add(Intrinsic.X86Punpcklbw,    new IntrinsicInfo(X86Instruction.Punpcklbw,    IntrinsicType.Binary));
-            Add(Intrinsic.X86Punpckldq,    new IntrinsicInfo(X86Instruction.Punpckldq,    IntrinsicType.Binary));
-            Add(Intrinsic.X86Punpcklqdq,   new IntrinsicInfo(X86Instruction.Punpcklqdq,   IntrinsicType.Binary));
-            Add(Intrinsic.X86Punpcklwd,    new IntrinsicInfo(X86Instruction.Punpcklwd,    IntrinsicType.Binary));
-            Add(Intrinsic.X86Pxor,         new IntrinsicInfo(X86Instruction.Pxor,         IntrinsicType.Binary));
-            Add(Intrinsic.X86Rcpps,        new IntrinsicInfo(X86Instruction.Rcpps,        IntrinsicType.Unary));
-            Add(Intrinsic.X86Rcpss,        new IntrinsicInfo(X86Instruction.Rcpss,        IntrinsicType.Unary));
-            Add(Intrinsic.X86Roundpd,      new IntrinsicInfo(X86Instruction.Roundpd,      IntrinsicType.BinaryImm));
-            Add(Intrinsic.X86Roundps,      new IntrinsicInfo(X86Instruction.Roundps,      IntrinsicType.BinaryImm));
-            Add(Intrinsic.X86Roundsd,      new IntrinsicInfo(X86Instruction.Roundsd,      IntrinsicType.BinaryImm));
-            Add(Intrinsic.X86Roundss,      new IntrinsicInfo(X86Instruction.Roundss,      IntrinsicType.BinaryImm));
-            Add(Intrinsic.X86Rsqrtps,      new IntrinsicInfo(X86Instruction.Rsqrtps,      IntrinsicType.Unary));
-            Add(Intrinsic.X86Rsqrtss,      new IntrinsicInfo(X86Instruction.Rsqrtss,      IntrinsicType.Unary));
-            Add(Intrinsic.X86Sha256Msg1,   new IntrinsicInfo(X86Instruction.Sha256Msg1,   IntrinsicType.Binary));
-            Add(Intrinsic.X86Sha256Msg2,   new IntrinsicInfo(X86Instruction.Sha256Msg2,   IntrinsicType.Binary));
-            Add(Intrinsic.X86Sha256Rnds2,  new IntrinsicInfo(X86Instruction.Sha256Rnds2,  IntrinsicType.Ternary));
-            Add(Intrinsic.X86Shufpd,       new IntrinsicInfo(X86Instruction.Shufpd,       IntrinsicType.TernaryImm));
-            Add(Intrinsic.X86Shufps,       new IntrinsicInfo(X86Instruction.Shufps,       IntrinsicType.TernaryImm));
-            Add(Intrinsic.X86Sqrtpd,       new IntrinsicInfo(X86Instruction.Sqrtpd,       IntrinsicType.Unary));
-            Add(Intrinsic.X86Sqrtps,       new IntrinsicInfo(X86Instruction.Sqrtps,       IntrinsicType.Unary));
-            Add(Intrinsic.X86Sqrtsd,       new IntrinsicInfo(X86Instruction.Sqrtsd,       IntrinsicType.Unary));
-            Add(Intrinsic.X86Sqrtss,       new IntrinsicInfo(X86Instruction.Sqrtss,       IntrinsicType.Unary));
-            Add(Intrinsic.X86Subpd,        new IntrinsicInfo(X86Instruction.Subpd,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Subps,        new IntrinsicInfo(X86Instruction.Subps,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Subsd,        new IntrinsicInfo(X86Instruction.Subsd,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Subss,        new IntrinsicInfo(X86Instruction.Subss,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Unpckhpd,     new IntrinsicInfo(X86Instruction.Unpckhpd,     IntrinsicType.Binary));
-            Add(Intrinsic.X86Unpckhps,     new IntrinsicInfo(X86Instruction.Unpckhps,     IntrinsicType.Binary));
-            Add(Intrinsic.X86Unpcklpd,     new IntrinsicInfo(X86Instruction.Unpcklpd,     IntrinsicType.Binary));
-            Add(Intrinsic.X86Unpcklps,     new IntrinsicInfo(X86Instruction.Unpcklps,     IntrinsicType.Binary));
-            Add(Intrinsic.X86Vcvtph2ps,    new IntrinsicInfo(X86Instruction.Vcvtph2ps,    IntrinsicType.Unary));
-            Add(Intrinsic.X86Vcvtps2ph,    new IntrinsicInfo(X86Instruction.Vcvtps2ph,    IntrinsicType.BinaryImm));
-            Add(Intrinsic.X86Vfmadd231ps,  new IntrinsicInfo(X86Instruction.Vfmadd231ps,  IntrinsicType.Fma));
-            Add(Intrinsic.X86Vfmadd231sd,  new IntrinsicInfo(X86Instruction.Vfmadd231sd,  IntrinsicType.Fma));
-            Add(Intrinsic.X86Vfmadd231ss,  new IntrinsicInfo(X86Instruction.Vfmadd231ss,  IntrinsicType.Fma));
-            Add(Intrinsic.X86Vfmsub231sd,  new IntrinsicInfo(X86Instruction.Vfmsub231sd,  IntrinsicType.Fma));
-            Add(Intrinsic.X86Vfmsub231ss,  new IntrinsicInfo(X86Instruction.Vfmsub231ss,  IntrinsicType.Fma));
-            Add(Intrinsic.X86Vfnmadd231ps, new IntrinsicInfo(X86Instruction.Vfnmadd231ps, IntrinsicType.Fma));
-            Add(Intrinsic.X86Vfnmadd231sd, new IntrinsicInfo(X86Instruction.Vfnmadd231sd, IntrinsicType.Fma));
-            Add(Intrinsic.X86Vfnmadd231ss, new IntrinsicInfo(X86Instruction.Vfnmadd231ss, IntrinsicType.Fma));
-            Add(Intrinsic.X86Vfnmsub231sd, new IntrinsicInfo(X86Instruction.Vfnmsub231sd, IntrinsicType.Fma));
-            Add(Intrinsic.X86Vfnmsub231ss, new IntrinsicInfo(X86Instruction.Vfnmsub231ss, IntrinsicType.Fma));
-            Add(Intrinsic.X86Xorpd,        new IntrinsicInfo(X86Instruction.Xorpd,        IntrinsicType.Binary));
-            Add(Intrinsic.X86Xorps,        new IntrinsicInfo(X86Instruction.Xorps,        IntrinsicType.Binary));
+            Add(Intrinsic.X86Addpd,         new IntrinsicInfo(X86Instruction.Addpd,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Addps,         new IntrinsicInfo(X86Instruction.Addps,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Addsd,         new IntrinsicInfo(X86Instruction.Addsd,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Addss,         new IntrinsicInfo(X86Instruction.Addss,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Aesdec,        new IntrinsicInfo(X86Instruction.Aesdec,        IntrinsicType.Binary));
+            Add(Intrinsic.X86Aesdeclast,    new IntrinsicInfo(X86Instruction.Aesdeclast,    IntrinsicType.Binary));
+            Add(Intrinsic.X86Aesenc,        new IntrinsicInfo(X86Instruction.Aesenc,        IntrinsicType.Binary));
+            Add(Intrinsic.X86Aesenclast,    new IntrinsicInfo(X86Instruction.Aesenclast,    IntrinsicType.Binary));
+            Add(Intrinsic.X86Aesimc,        new IntrinsicInfo(X86Instruction.Aesimc,        IntrinsicType.Unary));
+            Add(Intrinsic.X86Andnpd,        new IntrinsicInfo(X86Instruction.Andnpd,        IntrinsicType.Binary));
+            Add(Intrinsic.X86Andnps,        new IntrinsicInfo(X86Instruction.Andnps,        IntrinsicType.Binary));
+            Add(Intrinsic.X86Andpd,         new IntrinsicInfo(X86Instruction.Andpd,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Andps,         new IntrinsicInfo(X86Instruction.Andps,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Blendvpd,      new IntrinsicInfo(X86Instruction.Blendvpd,      IntrinsicType.Ternary));
+            Add(Intrinsic.X86Blendvps,      new IntrinsicInfo(X86Instruction.Blendvps,      IntrinsicType.Ternary));
+            Add(Intrinsic.X86Cmppd,         new IntrinsicInfo(X86Instruction.Cmppd,         IntrinsicType.TernaryImm));
+            Add(Intrinsic.X86Cmpps,         new IntrinsicInfo(X86Instruction.Cmpps,         IntrinsicType.TernaryImm));
+            Add(Intrinsic.X86Cmpsd,         new IntrinsicInfo(X86Instruction.Cmpsd,         IntrinsicType.TernaryImm));
+            Add(Intrinsic.X86Cmpss,         new IntrinsicInfo(X86Instruction.Cmpss,         IntrinsicType.TernaryImm));
+            Add(Intrinsic.X86Comisdeq,      new IntrinsicInfo(X86Instruction.Comisd,        IntrinsicType.Comis_));
+            Add(Intrinsic.X86Comisdge,      new IntrinsicInfo(X86Instruction.Comisd,        IntrinsicType.Comis_));
+            Add(Intrinsic.X86Comisdlt,      new IntrinsicInfo(X86Instruction.Comisd,        IntrinsicType.Comis_));
+            Add(Intrinsic.X86Comisseq,      new IntrinsicInfo(X86Instruction.Comiss,        IntrinsicType.Comis_));
+            Add(Intrinsic.X86Comissge,      new IntrinsicInfo(X86Instruction.Comiss,        IntrinsicType.Comis_));
+            Add(Intrinsic.X86Comisslt,      new IntrinsicInfo(X86Instruction.Comiss,        IntrinsicType.Comis_));
+            Add(Intrinsic.X86Crc32,         new IntrinsicInfo(X86Instruction.Crc32,         IntrinsicType.Crc32));
+            Add(Intrinsic.X86Crc32_16,      new IntrinsicInfo(X86Instruction.Crc32_16,      IntrinsicType.Crc32));
+            Add(Intrinsic.X86Crc32_8,       new IntrinsicInfo(X86Instruction.Crc32_8,       IntrinsicType.Crc32));
+            Add(Intrinsic.X86Cvtdq2pd,      new IntrinsicInfo(X86Instruction.Cvtdq2pd,      IntrinsicType.Unary));
+            Add(Intrinsic.X86Cvtdq2ps,      new IntrinsicInfo(X86Instruction.Cvtdq2ps,      IntrinsicType.Unary));
+            Add(Intrinsic.X86Cvtpd2dq,      new IntrinsicInfo(X86Instruction.Cvtpd2dq,      IntrinsicType.Unary));
+            Add(Intrinsic.X86Cvtpd2ps,      new IntrinsicInfo(X86Instruction.Cvtpd2ps,      IntrinsicType.Unary));
+            Add(Intrinsic.X86Cvtps2dq,      new IntrinsicInfo(X86Instruction.Cvtps2dq,      IntrinsicType.Unary));
+            Add(Intrinsic.X86Cvtps2pd,      new IntrinsicInfo(X86Instruction.Cvtps2pd,      IntrinsicType.Unary));
+            Add(Intrinsic.X86Cvtsd2si,      new IntrinsicInfo(X86Instruction.Cvtsd2si,      IntrinsicType.UnaryToGpr));
+            Add(Intrinsic.X86Cvtsd2ss,      new IntrinsicInfo(X86Instruction.Cvtsd2ss,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Cvtsi2sd,      new IntrinsicInfo(X86Instruction.Cvtsi2sd,      IntrinsicType.BinaryGpr));
+            Add(Intrinsic.X86Cvtsi2si,      new IntrinsicInfo(X86Instruction.Movd,          IntrinsicType.UnaryToGpr));
+            Add(Intrinsic.X86Cvtsi2ss,      new IntrinsicInfo(X86Instruction.Cvtsi2ss,      IntrinsicType.BinaryGpr));
+            Add(Intrinsic.X86Cvtss2sd,      new IntrinsicInfo(X86Instruction.Cvtss2sd,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Cvtss2si,      new IntrinsicInfo(X86Instruction.Cvtss2si,      IntrinsicType.UnaryToGpr));
+            Add(Intrinsic.X86Divpd,         new IntrinsicInfo(X86Instruction.Divpd,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Divps,         new IntrinsicInfo(X86Instruction.Divps,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Divsd,         new IntrinsicInfo(X86Instruction.Divsd,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Divss,         new IntrinsicInfo(X86Instruction.Divss,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Gf2p8affineqb, new IntrinsicInfo(X86Instruction.Gf2p8affineqb, IntrinsicType.TernaryImm));
+            Add(Intrinsic.X86Haddpd,        new IntrinsicInfo(X86Instruction.Haddpd,        IntrinsicType.Binary));
+            Add(Intrinsic.X86Haddps,        new IntrinsicInfo(X86Instruction.Haddps,        IntrinsicType.Binary));
+            Add(Intrinsic.X86Insertps,      new IntrinsicInfo(X86Instruction.Insertps,      IntrinsicType.TernaryImm));
+            Add(Intrinsic.X86Maxpd,         new IntrinsicInfo(X86Instruction.Maxpd,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Maxps,         new IntrinsicInfo(X86Instruction.Maxps,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Maxsd,         new IntrinsicInfo(X86Instruction.Maxsd,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Maxss,         new IntrinsicInfo(X86Instruction.Maxss,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Minpd,         new IntrinsicInfo(X86Instruction.Minpd,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Minps,         new IntrinsicInfo(X86Instruction.Minps,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Minsd,         new IntrinsicInfo(X86Instruction.Minsd,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Minss,         new IntrinsicInfo(X86Instruction.Minss,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Movhlps,       new IntrinsicInfo(X86Instruction.Movhlps,       IntrinsicType.Binary));
+            Add(Intrinsic.X86Movlhps,       new IntrinsicInfo(X86Instruction.Movlhps,       IntrinsicType.Binary));
+            Add(Intrinsic.X86Movss,         new IntrinsicInfo(X86Instruction.Movss,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Mulpd,         new IntrinsicInfo(X86Instruction.Mulpd,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Mulps,         new IntrinsicInfo(X86Instruction.Mulps,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Mulsd,         new IntrinsicInfo(X86Instruction.Mulsd,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Mulss,         new IntrinsicInfo(X86Instruction.Mulss,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Mxcsrmb,       new IntrinsicInfo(X86Instruction.None,          IntrinsicType.Mxcsr)); // Mask bits.
+            Add(Intrinsic.X86Mxcsrub,       new IntrinsicInfo(X86Instruction.None,          IntrinsicType.Mxcsr)); // Unmask bits.
+            Add(Intrinsic.X86Paddb,         new IntrinsicInfo(X86Instruction.Paddb,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Paddd,         new IntrinsicInfo(X86Instruction.Paddd,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Paddq,         new IntrinsicInfo(X86Instruction.Paddq,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Paddw,         new IntrinsicInfo(X86Instruction.Paddw,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Palignr,       new IntrinsicInfo(X86Instruction.Palignr,       IntrinsicType.TernaryImm));
+            Add(Intrinsic.X86Pand,          new IntrinsicInfo(X86Instruction.Pand,          IntrinsicType.Binary));
+            Add(Intrinsic.X86Pandn,         new IntrinsicInfo(X86Instruction.Pandn,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Pavgb,         new IntrinsicInfo(X86Instruction.Pavgb,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Pavgw,         new IntrinsicInfo(X86Instruction.Pavgw,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Pblendvb,      new IntrinsicInfo(X86Instruction.Pblendvb,      IntrinsicType.Ternary));
+            Add(Intrinsic.X86Pclmulqdq,     new IntrinsicInfo(X86Instruction.Pclmulqdq,     IntrinsicType.TernaryImm));
+            Add(Intrinsic.X86Pcmpeqb,       new IntrinsicInfo(X86Instruction.Pcmpeqb,       IntrinsicType.Binary));
+            Add(Intrinsic.X86Pcmpeqd,       new IntrinsicInfo(X86Instruction.Pcmpeqd,       IntrinsicType.Binary));
+            Add(Intrinsic.X86Pcmpeqq,       new IntrinsicInfo(X86Instruction.Pcmpeqq,       IntrinsicType.Binary));
+            Add(Intrinsic.X86Pcmpeqw,       new IntrinsicInfo(X86Instruction.Pcmpeqw,       IntrinsicType.Binary));
+            Add(Intrinsic.X86Pcmpgtb,       new IntrinsicInfo(X86Instruction.Pcmpgtb,       IntrinsicType.Binary));
+            Add(Intrinsic.X86Pcmpgtd,       new IntrinsicInfo(X86Instruction.Pcmpgtd,       IntrinsicType.Binary));
+            Add(Intrinsic.X86Pcmpgtq,       new IntrinsicInfo(X86Instruction.Pcmpgtq,       IntrinsicType.Binary));
+            Add(Intrinsic.X86Pcmpgtw,       new IntrinsicInfo(X86Instruction.Pcmpgtw,       IntrinsicType.Binary));
+            Add(Intrinsic.X86Pmaxsb,        new IntrinsicInfo(X86Instruction.Pmaxsb,        IntrinsicType.Binary));
+            Add(Intrinsic.X86Pmaxsd,        new IntrinsicInfo(X86Instruction.Pmaxsd,        IntrinsicType.Binary));
+            Add(Intrinsic.X86Pmaxsw,        new IntrinsicInfo(X86Instruction.Pmaxsw,        IntrinsicType.Binary));
+            Add(Intrinsic.X86Pmaxub,        new IntrinsicInfo(X86Instruction.Pmaxub,        IntrinsicType.Binary));
+            Add(Intrinsic.X86Pmaxud,        new IntrinsicInfo(X86Instruction.Pmaxud,        IntrinsicType.Binary));
+            Add(Intrinsic.X86Pmaxuw,        new IntrinsicInfo(X86Instruction.Pmaxuw,        IntrinsicType.Binary));
+            Add(Intrinsic.X86Pminsb,        new IntrinsicInfo(X86Instruction.Pminsb,        IntrinsicType.Binary));
+            Add(Intrinsic.X86Pminsd,        new IntrinsicInfo(X86Instruction.Pminsd,        IntrinsicType.Binary));
+            Add(Intrinsic.X86Pminsw,        new IntrinsicInfo(X86Instruction.Pminsw,        IntrinsicType.Binary));
+            Add(Intrinsic.X86Pminub,        new IntrinsicInfo(X86Instruction.Pminub,        IntrinsicType.Binary));
+            Add(Intrinsic.X86Pminud,        new IntrinsicInfo(X86Instruction.Pminud,        IntrinsicType.Binary));
+            Add(Intrinsic.X86Pminuw,        new IntrinsicInfo(X86Instruction.Pminuw,        IntrinsicType.Binary));
+            Add(Intrinsic.X86Pmovsxbw,      new IntrinsicInfo(X86Instruction.Pmovsxbw,      IntrinsicType.Unary));
+            Add(Intrinsic.X86Pmovsxdq,      new IntrinsicInfo(X86Instruction.Pmovsxdq,      IntrinsicType.Unary));
+            Add(Intrinsic.X86Pmovsxwd,      new IntrinsicInfo(X86Instruction.Pmovsxwd,      IntrinsicType.Unary));
+            Add(Intrinsic.X86Pmovzxbw,      new IntrinsicInfo(X86Instruction.Pmovzxbw,      IntrinsicType.Unary));
+            Add(Intrinsic.X86Pmovzxdq,      new IntrinsicInfo(X86Instruction.Pmovzxdq,      IntrinsicType.Unary));
+            Add(Intrinsic.X86Pmovzxwd,      new IntrinsicInfo(X86Instruction.Pmovzxwd,      IntrinsicType.Unary));
+            Add(Intrinsic.X86Pmulld,        new IntrinsicInfo(X86Instruction.Pmulld,        IntrinsicType.Binary));
+            Add(Intrinsic.X86Pmullw,        new IntrinsicInfo(X86Instruction.Pmullw,        IntrinsicType.Binary));
+            Add(Intrinsic.X86Popcnt,        new IntrinsicInfo(X86Instruction.Popcnt,        IntrinsicType.PopCount));
+            Add(Intrinsic.X86Por,           new IntrinsicInfo(X86Instruction.Por,           IntrinsicType.Binary));
+            Add(Intrinsic.X86Pshufb,        new IntrinsicInfo(X86Instruction.Pshufb,        IntrinsicType.Binary));
+            Add(Intrinsic.X86Pshufd,        new IntrinsicInfo(X86Instruction.Pshufd,        IntrinsicType.BinaryImm));
+            Add(Intrinsic.X86Pslld,         new IntrinsicInfo(X86Instruction.Pslld,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Pslldq,        new IntrinsicInfo(X86Instruction.Pslldq,        IntrinsicType.Binary));
+            Add(Intrinsic.X86Psllq,         new IntrinsicInfo(X86Instruction.Psllq,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Psllw,         new IntrinsicInfo(X86Instruction.Psllw,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Psrad,         new IntrinsicInfo(X86Instruction.Psrad,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Psraw,         new IntrinsicInfo(X86Instruction.Psraw,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Psrld,         new IntrinsicInfo(X86Instruction.Psrld,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Psrlq,         new IntrinsicInfo(X86Instruction.Psrlq,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Psrldq,        new IntrinsicInfo(X86Instruction.Psrldq,        IntrinsicType.Binary));
+            Add(Intrinsic.X86Psrlw,         new IntrinsicInfo(X86Instruction.Psrlw,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Psubb,         new IntrinsicInfo(X86Instruction.Psubb,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Psubd,         new IntrinsicInfo(X86Instruction.Psubd,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Psubq,         new IntrinsicInfo(X86Instruction.Psubq,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Psubw,         new IntrinsicInfo(X86Instruction.Psubw,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Punpckhbw,     new IntrinsicInfo(X86Instruction.Punpckhbw,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Punpckhdq,     new IntrinsicInfo(X86Instruction.Punpckhdq,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Punpckhqdq,    new IntrinsicInfo(X86Instruction.Punpckhqdq,    IntrinsicType.Binary));
+            Add(Intrinsic.X86Punpckhwd,     new IntrinsicInfo(X86Instruction.Punpckhwd,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Punpcklbw,     new IntrinsicInfo(X86Instruction.Punpcklbw,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Punpckldq,     new IntrinsicInfo(X86Instruction.Punpckldq,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Punpcklqdq,    new IntrinsicInfo(X86Instruction.Punpcklqdq,    IntrinsicType.Binary));
+            Add(Intrinsic.X86Punpcklwd,     new IntrinsicInfo(X86Instruction.Punpcklwd,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Pxor,          new IntrinsicInfo(X86Instruction.Pxor,          IntrinsicType.Binary));
+            Add(Intrinsic.X86Rcpps,         new IntrinsicInfo(X86Instruction.Rcpps,         IntrinsicType.Unary));
+            Add(Intrinsic.X86Rcpss,         new IntrinsicInfo(X86Instruction.Rcpss,         IntrinsicType.Unary));
+            Add(Intrinsic.X86Roundpd,       new IntrinsicInfo(X86Instruction.Roundpd,       IntrinsicType.BinaryImm));
+            Add(Intrinsic.X86Roundps,       new IntrinsicInfo(X86Instruction.Roundps,       IntrinsicType.BinaryImm));
+            Add(Intrinsic.X86Roundsd,       new IntrinsicInfo(X86Instruction.Roundsd,       IntrinsicType.BinaryImm));
+            Add(Intrinsic.X86Roundss,       new IntrinsicInfo(X86Instruction.Roundss,       IntrinsicType.BinaryImm));
+            Add(Intrinsic.X86Rsqrtps,       new IntrinsicInfo(X86Instruction.Rsqrtps,       IntrinsicType.Unary));
+            Add(Intrinsic.X86Rsqrtss,       new IntrinsicInfo(X86Instruction.Rsqrtss,       IntrinsicType.Unary));
+            Add(Intrinsic.X86Sha256Msg1,    new IntrinsicInfo(X86Instruction.Sha256Msg1,    IntrinsicType.Binary));
+            Add(Intrinsic.X86Sha256Msg2,    new IntrinsicInfo(X86Instruction.Sha256Msg2,    IntrinsicType.Binary));
+            Add(Intrinsic.X86Sha256Rnds2,   new IntrinsicInfo(X86Instruction.Sha256Rnds2,   IntrinsicType.Ternary));
+            Add(Intrinsic.X86Shufpd,        new IntrinsicInfo(X86Instruction.Shufpd,        IntrinsicType.TernaryImm));
+            Add(Intrinsic.X86Shufps,        new IntrinsicInfo(X86Instruction.Shufps,        IntrinsicType.TernaryImm));
+            Add(Intrinsic.X86Sqrtpd,        new IntrinsicInfo(X86Instruction.Sqrtpd,        IntrinsicType.Unary));
+            Add(Intrinsic.X86Sqrtps,        new IntrinsicInfo(X86Instruction.Sqrtps,        IntrinsicType.Unary));
+            Add(Intrinsic.X86Sqrtsd,        new IntrinsicInfo(X86Instruction.Sqrtsd,        IntrinsicType.Unary));
+            Add(Intrinsic.X86Sqrtss,        new IntrinsicInfo(X86Instruction.Sqrtss,        IntrinsicType.Unary));
+            Add(Intrinsic.X86Subpd,         new IntrinsicInfo(X86Instruction.Subpd,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Subps,         new IntrinsicInfo(X86Instruction.Subps,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Subsd,         new IntrinsicInfo(X86Instruction.Subsd,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Subss,         new IntrinsicInfo(X86Instruction.Subss,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Unpckhpd,      new IntrinsicInfo(X86Instruction.Unpckhpd,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Unpckhps,      new IntrinsicInfo(X86Instruction.Unpckhps,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Unpcklpd,      new IntrinsicInfo(X86Instruction.Unpcklpd,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Unpcklps,      new IntrinsicInfo(X86Instruction.Unpcklps,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Vcvtph2ps,     new IntrinsicInfo(X86Instruction.Vcvtph2ps,     IntrinsicType.Unary));
+            Add(Intrinsic.X86Vcvtps2ph,     new IntrinsicInfo(X86Instruction.Vcvtps2ph,     IntrinsicType.BinaryImm));
+            Add(Intrinsic.X86Vfmadd231ps,   new IntrinsicInfo(X86Instruction.Vfmadd231ps,   IntrinsicType.Fma));
+            Add(Intrinsic.X86Vfmadd231sd,   new IntrinsicInfo(X86Instruction.Vfmadd231sd,   IntrinsicType.Fma));
+            Add(Intrinsic.X86Vfmadd231ss,   new IntrinsicInfo(X86Instruction.Vfmadd231ss,   IntrinsicType.Fma));
+            Add(Intrinsic.X86Vfmsub231sd,   new IntrinsicInfo(X86Instruction.Vfmsub231sd,   IntrinsicType.Fma));
+            Add(Intrinsic.X86Vfmsub231ss,   new IntrinsicInfo(X86Instruction.Vfmsub231ss,   IntrinsicType.Fma));
+            Add(Intrinsic.X86Vfnmadd231ps,  new IntrinsicInfo(X86Instruction.Vfnmadd231ps,  IntrinsicType.Fma));
+            Add(Intrinsic.X86Vfnmadd231sd,  new IntrinsicInfo(X86Instruction.Vfnmadd231sd,  IntrinsicType.Fma));
+            Add(Intrinsic.X86Vfnmadd231ss,  new IntrinsicInfo(X86Instruction.Vfnmadd231ss,  IntrinsicType.Fma));
+            Add(Intrinsic.X86Vfnmsub231sd,  new IntrinsicInfo(X86Instruction.Vfnmsub231sd,  IntrinsicType.Fma));
+            Add(Intrinsic.X86Vfnmsub231ss,  new IntrinsicInfo(X86Instruction.Vfnmsub231ss,  IntrinsicType.Fma));
+            Add(Intrinsic.X86Xorpd,         new IntrinsicInfo(X86Instruction.Xorpd,         IntrinsicType.Binary));
+            Add(Intrinsic.X86Xorps,         new IntrinsicInfo(X86Instruction.Xorps,         IntrinsicType.Binary));
         }
 
         private static void Add(Intrinsic intrin, IntrinsicInfo info)
diff --git a/ARMeilleure/CodeGen/X86/X86Instruction.cs b/ARMeilleure/CodeGen/X86/X86Instruction.cs
index 37926a169b..b024394e16 100644
--- a/ARMeilleure/CodeGen/X86/X86Instruction.cs
+++ b/ARMeilleure/CodeGen/X86/X86Instruction.cs
@@ -54,6 +54,7 @@ namespace ARMeilleure.CodeGen.X86
         Divps,
         Divsd,
         Divss,
+        Gf2p8affineqb,
         Haddpd,
         Haddps,
         Idiv,
diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/ARMeilleure/Instructions/InstEmitSimdHelper.cs
index 49c9e68790..49c17560b4 100644
--- a/ARMeilleure/Instructions/InstEmitSimdHelper.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdHelper.cs
@@ -243,6 +243,21 @@ namespace ARMeilleure.Instructions
             throw new ArgumentException($"Invalid rounding mode \"{roundMode}\".");
         }
 
+        public static ulong X86GetGf2p8LogicalShiftLeft(int shift)
+        {
+            ulong identity = 
+                (0b00000001UL << 56) |
+                (0b00000010UL << 48) |
+                (0b00000100UL << 40) |
+                (0b00001000UL << 32) |
+                (0b00010000UL << 24) |
+                (0b00100000UL << 16) |
+                (0b01000000UL <<  8) |
+                (0b10000000UL <<  0);
+
+            return shift >= 0 ? identity >> (shift * 8) : identity << (-shift * 8);
+        }
+
         public static Operand EmitCountSetBits8(ArmEmitterContext context, Operand op) // "size" is 8 (SIMD&FP Inst.).
         {
             Debug.Assert(op.Type == OperandType.I32 || op.Type == OperandType.I64);
diff --git a/ARMeilleure/Instructions/InstEmitSimdLogical.cs b/ARMeilleure/Instructions/InstEmitSimdLogical.cs
index dbd1a1a00a..624ae841d3 100644
--- a/ARMeilleure/Instructions/InstEmitSimdLogical.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdLogical.cs
@@ -336,20 +336,45 @@ namespace ARMeilleure.Instructions
         {
             OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 
-            Operand res = context.VectorZero();
-
-            int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8;
-
-            for (int index = 0; index < elems; index++)
+            if (Optimizations.UseGfni)
             {
-                Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0);
+                const long bitMatrix =
+                    (0b10000000L << 56) |
+                    (0b01000000L << 48) |
+                    (0b00100000L << 40) |
+                    (0b00010000L << 32) |
+                    (0b00001000L << 24) |
+                    (0b00000100L << 16) |
+                    (0b00000010L <<  8) |
+                    (0b00000001L <<  0);
 
-                Operand de = EmitReverseBits8Op(context, ne);
+                Operand vBitMatrix = X86GetAllElements(context, bitMatrix);
 
-                res = EmitVectorInsert(context, res, de, index, 0);
+                Operand res = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, GetVec(op.Rn), vBitMatrix, Const(0));
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
             }
+            else
+            {
+                Operand res = context.VectorZero();
+                int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8;
 
-            context.Copy(GetVec(op.Rd), res);
+                for (int index = 0; index < elems; index++)
+                {
+                    Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0);
+
+                    Operand de = EmitReverseBits8Op(context, ne);
+
+                    res = EmitVectorInsert(context, res, de, index, 0);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
         }
 
         private static Operand EmitReverseBits8Op(ArmEmitterContext context, Operand op)
diff --git a/ARMeilleure/Instructions/InstEmitSimdShift.cs b/ARMeilleure/Instructions/InstEmitSimdShift.cs
index 146aeafa73..cf3b51bd6b 100644
--- a/ARMeilleure/Instructions/InstEmitSimdShift.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdShift.cs
@@ -88,8 +88,35 @@ namespace ARMeilleure.Instructions
             OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
 
             int shift = GetImmShl(op);
+            int eSize = 8 << op.Size;
 
-            if (Optimizations.UseSse2 && op.Size > 0)
+            if (shift >= eSize)
+            {
+                if ((op.RegisterSize == RegisterSize.Simd64))
+                {
+                    Operand res = context.VectorZeroUpper64(GetVec(op.Rd));
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+            }
+            else if (Optimizations.UseGfni && op.Size == 0)
+            {
+                Operand n = GetVec(op.Rn);
+
+                ulong bitMatrix = X86GetGf2p8LogicalShiftLeft(shift);
+
+                Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix);
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0));
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else if (Optimizations.UseSse2 && op.Size > 0)
             {
                 Operand n = GetVec(op.Rn);
 
@@ -396,10 +423,40 @@ namespace ARMeilleure.Instructions
         {
             OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
-            {
-                int shift = GetImmShr(op);
+            int shift = GetImmShr(op);
 
+            if (Optimizations.UseGfni && op.Size == 0)
+            {
+                Operand n = GetVec(op.Rn);
+
+                ulong bitMatrix;
+
+                if (shift < 8)
+                {
+                    bitMatrix = X86GetGf2p8LogicalShiftLeft(-shift);
+
+                    // Extend sign-bit
+                    bitMatrix |= 0x8080808080808080UL >> (64 - shift * 8);
+                }
+                else
+                {
+                    // Replicate sign-bit into all bits
+                    bitMatrix = 0x8080808080808080UL;
+                }
+
+                Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix);
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0));
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+            {
                 Operand n = GetVec(op.Rn);
 
                 Intrinsic sraInst = X86PsraInstruction[op.Size];
@@ -929,10 +986,44 @@ namespace ARMeilleure.Instructions
             OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
 
             int shift = GetImmShl(op);
+            int eSize = 8 << op.Size;
 
             ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0UL;
 
-            if (Optimizations.UseSse2 && op.Size > 0)
+            if (shift >= eSize)
+            {
+                if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
+                {
+                    Operand res = context.VectorZeroUpper64(GetVec(op.Rd));
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+            }
+            else if (Optimizations.UseGfni && op.Size == 0)
+            {
+                Operand d = GetVec(op.Rd);
+                Operand n = GetVec(op.Rn);
+
+                ulong bitMatrix = X86GetGf2p8LogicalShiftLeft(shift);
+
+                Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix);
+
+                Operand nShifted = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0));
+
+                Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]);
+
+                Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask);
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked);
+
+                if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(d, res);
+            }
+            else if (Optimizations.UseSse2 && op.Size > 0)
             {
                 Operand d = GetVec(op.Rd);
                 Operand n = GetVec(op.Rn);
@@ -988,7 +1079,40 @@ namespace ARMeilleure.Instructions
 
             ulong mask = (ulong.MaxValue << (eSize - shift)) & (ulong.MaxValue >> (64 - eSize));
 
-            if (Optimizations.UseSse2 && op.Size > 0)
+            if (shift >= eSize)
+            {
+                if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
+                {
+                    Operand res = context.VectorZeroUpper64(GetVec(op.Rd));
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+            }
+            else if (Optimizations.UseGfni && op.Size == 0)
+            {
+                Operand d = GetVec(op.Rd);
+                Operand n = GetVec(op.Rn);
+
+                ulong bitMatrix = X86GetGf2p8LogicalShiftLeft(-shift);
+
+                Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix);
+
+                Operand nShifted = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0));
+
+                Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]);
+
+                Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask);
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked);
+
+                if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(d, res);
+            }
+            else if (Optimizations.UseSse2 && op.Size > 0)
             {
                 Operand d = GetVec(op.Rd);
                 Operand n = GetVec(op.Rn);
diff --git a/ARMeilleure/IntermediateRepresentation/Intrinsic.cs b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs
index 71bfc3bec2..bc1285be27 100644
--- a/ARMeilleure/IntermediateRepresentation/Intrinsic.cs
+++ b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs
@@ -47,6 +47,7 @@ namespace ARMeilleure.IntermediateRepresentation
         X86Divps,
         X86Divsd,
         X86Divss,
+        X86Gf2p8affineqb,
         X86Haddpd,
         X86Haddps,
         X86Insertps,
diff --git a/ARMeilleure/Optimizations.cs b/ARMeilleure/Optimizations.cs
index 19193971cc..97defd9a95 100644
--- a/ARMeilleure/Optimizations.cs
+++ b/ARMeilleure/Optimizations.cs
@@ -22,6 +22,7 @@ namespace ARMeilleure
         public static bool UseAesniIfAvailable     { get; set; } = true;
         public static bool UsePclmulqdqIfAvailable { get; set; } = true;
         public static bool UseShaIfAvailable       { get; set; } = true;
+        public static bool UseGfniIfAvailable      { get; set; } = true;
 
         public static bool ForceLegacySse
         {
@@ -42,5 +43,6 @@ namespace ARMeilleure
         internal static bool UseAesni     => UseAesniIfAvailable     && HardwareCapabilities.SupportsAesni;
         internal static bool UsePclmulqdq => UsePclmulqdqIfAvailable && HardwareCapabilities.SupportsPclmulqdq;
         internal static bool UseSha       => UseShaIfAvailable       && HardwareCapabilities.SupportsSha;
+        internal static bool UseGfni      => UseGfniIfAvailable      && HardwareCapabilities.SupportsGfni;
     }
 }
\ No newline at end of file
diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs
index f4ae411b7c..1515713be5 100644
--- a/ARMeilleure/Translation/PTC/Ptc.cs
+++ b/ARMeilleure/Translation/PTC/Ptc.cs
@@ -27,7 +27,7 @@ namespace ARMeilleure.Translation.PTC
         private const string OuterHeaderMagicString = "PTCohd\0\0";
         private const string InnerHeaderMagicString = "PTCihd\0\0";
 
-        private const uint InternalVersion = 3703; //! To be incremented manually for each change to the ARMeilleure project.
+        private const uint InternalVersion = 3710; //! To be incremented manually for each change to the ARMeilleure project.
 
         private const string ActualDir = "0";
         private const string BackupDir = "1";
@@ -951,7 +951,8 @@ namespace ARMeilleure.Translation.PTC
             return new FeatureInfo(
                 (uint)HardwareCapabilities.FeatureInfo1Ecx,
                 (uint)HardwareCapabilities.FeatureInfo1Edx,
-                (uint)HardwareCapabilities.FeatureInfo7Ebx);
+                (uint)HardwareCapabilities.FeatureInfo7Ebx,
+                (uint)HardwareCapabilities.FeatureInfo7Ecx);
         }
 
         private static byte GetMemoryManagerMode()
@@ -971,7 +972,7 @@ namespace ARMeilleure.Translation.PTC
             return osPlatform;
         }
 
-        [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 54*/)]
+        [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 58*/)]
         private struct OuterHeader
         {
             public ulong Magic;
@@ -1002,8 +1003,8 @@ namespace ARMeilleure.Translation.PTC
             }
         }
 
-        [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 12*/)]
-        private record struct FeatureInfo(uint FeatureInfo0, uint FeatureInfo1, uint FeatureInfo2);
+        [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 16*/)]
+        private record struct FeatureInfo(uint FeatureInfo0, uint FeatureInfo1, uint FeatureInfo2, uint FeatureInfo3);
 
         [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 128*/)]
         private struct InnerHeader