diff --git a/ARMeilleure/ARMeilleure.csproj b/ARMeilleure/ARMeilleure.csproj index 0d4574afd9..bb3f47219a 100644 --- a/ARMeilleure/ARMeilleure.csproj +++ b/ARMeilleure/ARMeilleure.csproj @@ -7,7 +7,6 @@ - diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs index 123c80fb1f..c15deadc53 100644 --- a/ARMeilleure/CodeGen/X86/Assembler.cs +++ b/ARMeilleure/CodeGen/X86/Assembler.cs @@ -1033,13 +1033,7 @@ namespace ARMeilleure.CodeGen.X86 Debug.Assert(opCode != BadOp, "Invalid opcode value."); - if ((flags & InstructionFlags.Evex) != 0 && HardwareCapabilities.SupportsEvexEncoding) - { - WriteEvexInst(dest, src1, src2, type, flags, opCode); - - opCode &= 0xff; - } - else if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding) + if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding) { // In a vex encoding, only one prefix can be active at a time. The active prefix is encoded in the second byte using two bits. @@ -1158,103 +1152,6 @@ namespace ARMeilleure.CodeGen.X86 } } - private void WriteEvexInst( - Operand dest, - Operand src1, - Operand src2, - OperandType type, - InstructionFlags flags, - int opCode, - bool broadcast = false, - int registerWidth = 128, - int maskRegisterIdx = 0, - bool zeroElements = false) - { - int destIdx = dest.GetRegister().Index; - int src1Idx = src1.GetRegister().Index; - int src2Idx = src2.GetRegister().Index; - - WriteByte(0x62); - - // P0 - // Extend dest register - bool r = (destIdx & 8) == 0; - // Extend src register - bool x = (src1Idx & 16) == 0; - // Extend src register - bool b = (src1Idx & 8) == 0; - // Extend dest register - bool rp = (destIdx & 16) == 0; - // Escape code index - byte mm = 0b00; - - switch ((ushort)(opCode >> 8)) - { - case 0xf00: mm = 0b01; break; - case 0xf38: mm = 0b10; break; - case 0xf3a: mm = 0b11; break; - - default: Debug.Assert(false, $"Failed to EVEX encode opcode 0x{opCode:X}."); break; - } - - WriteByte( - (byte)( - (r ? 0x80 : 0) | - (x ? 0x40 : 0) | - (b ? 0x20 : 0) | - (rp ? 0x10 : 0) | - mm)); - - // P1 - // Specify 64-bit lane mode - bool w = Is64Bits(type); - // Src2 register index - byte vvvv = (byte)(src2Idx & 0b1111); - // Opcode prefix - byte pp = (flags & InstructionFlags.PrefixMask) switch - { - InstructionFlags.Prefix66 => 0b01, - InstructionFlags.PrefixF3 => 0b10, - InstructionFlags.PrefixF2 => 0b11, - _ => 0 - }; - WriteByte( - (byte)( - (w ? 0x80 : 0) | - (vvvv << 3) | - 0b100 | - pp)); - - // P2 - // Mask register determines what elements to zero, rather than what elements to merge - bool z = zeroElements; - // Specifies register-width - byte ll = 0b00; - switch (registerWidth) - { - case 128: ll = 0b00; break; - case 256: ll = 0b01; break; - case 512: ll = 0b10; break; - - default: Debug.Assert(false, $"Invalid EVEX vector register width {registerWidth}."); break; - } - // Embedded broadcast in the case of a memory operand - bool bcast = broadcast; - // Extend src2 register - bool vp = (src2Idx & 16) == 0; - // Mask register index - Debug.Assert(maskRegisterIdx < 8, $"Invalid mask register index {maskRegisterIdx}."); - byte aaa = (byte)(maskRegisterIdx & 0b111); - - WriteByte( - (byte)( - (z ? 0x80 : 0) | - (ll << 5) | - (bcast ? 0x10 : 0) | - (vp ? 8 : 0) | - aaa)); - } - private void WriteCompactInst(Operand operand, int opCode) { int regIndex = operand.GetRegister().Index; diff --git a/ARMeilleure/CodeGen/X86/AssemblerTable.cs b/ARMeilleure/CodeGen/X86/AssemblerTable.cs index b47b3ecd1a..ecdc029f96 100644 --- a/ARMeilleure/CodeGen/X86/AssemblerTable.cs +++ b/ARMeilleure/CodeGen/X86/AssemblerTable.cs @@ -20,7 +20,6 @@ namespace ARMeilleure.CodeGen.X86 Reg8Dest = 1 << 2, RexW = 1 << 3, Vex = 1 << 4, - Evex = 1 << 5, PrefixBit = 16, PrefixMask = 7 << PrefixBit, @@ -279,7 +278,6 @@ namespace ARMeilleure.CodeGen.X86 Add(X86Instruction.Vfnmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); Add(X86Instruction.Vfnmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66)); - Add(X86Instruction.Vpternlogd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a25, InstructionFlags.Evex | InstructionFlags.Prefix66)); Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None)); Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Xorps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex)); diff --git a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs index 63a9e46a24..c12a4e28b7 100644 --- a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs +++ b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs @@ -1,14 +1,10 @@ -using Ryujinx.Memory; using System; -using System.Runtime.InteropServices; using System.Runtime.Intrinsics.X86; namespace ARMeilleure.CodeGen.X86 { static class HardwareCapabilities { - private delegate uint GetXcr0(); - static HardwareCapabilities() { if (!X86Base.IsSupported) @@ -28,28 +24,6 @@ namespace ARMeilleure.CodeGen.X86 FeatureInfo7Ebx = (FeatureFlags7Ebx)ebx7; FeatureInfo7Ecx = (FeatureFlags7Ecx)ecx7; } - - Xcr0InfoEax = (Xcr0FlagsEax)GetXcr0Eax(); - } - - private static uint GetXcr0Eax() - { - ReadOnlySpan asmGetXcr0 = new byte[] - { - 0x31, 0xc9, // xor ecx, ecx - 0xf, 0x01, 0xd0, // xgetbv - 0xc3, // ret - }; - - using MemoryBlock memGetXcr0 = new MemoryBlock((ulong)asmGetXcr0.Length); - - memGetXcr0.Write(0, asmGetXcr0); - - memGetXcr0.Reprotect(0, (ulong)asmGetXcr0.Length, MemoryPermission.ReadAndExecute); - - var fGetXcr0 = Marshal.GetDelegateForFunctionPointer(memGetXcr0.Pointer); - - return fGetXcr0(); } [Flags] @@ -70,7 +44,6 @@ namespace ARMeilleure.CodeGen.X86 Sse42 = 1 << 20, Popcnt = 1 << 23, Aes = 1 << 25, - Osxsave = 1 << 27, Avx = 1 << 28, F16c = 1 << 29 } @@ -79,11 +52,7 @@ namespace ARMeilleure.CodeGen.X86 public enum FeatureFlags7Ebx { Avx2 = 1 << 5, - Avx512f = 1 << 16, - Avx512dq = 1 << 17, - Sha = 1 << 29, - Avx512bw = 1 << 30, - Avx512vl = 1 << 31 + Sha = 1 << 29 } [Flags] @@ -92,21 +61,10 @@ namespace ARMeilleure.CodeGen.X86 Gfni = 1 << 8, } - [Flags] - public enum Xcr0FlagsEax - { - Sse = 1 << 1, - YmmHi128 = 1 << 2, - Opmask = 1 << 5, - ZmmHi256 = 1 << 6, - Hi16Zmm = 1 << 7 - } - public static FeatureFlags1Edx FeatureInfo1Edx { get; } public static FeatureFlags1Ecx FeatureInfo1Ecx { get; } public static FeatureFlags7Ebx FeatureInfo7Ebx { get; } = 0; public static FeatureFlags7Ecx FeatureInfo7Ecx { get; } = 0; - public static Xcr0FlagsEax Xcr0InfoEax { get; } = 0; public static bool SupportsSse => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse); public static bool SupportsSse2 => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse2); @@ -118,13 +76,8 @@ namespace ARMeilleure.CodeGen.X86 public static bool SupportsSse42 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse42); public static bool SupportsPopcnt => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Popcnt); public static bool SupportsAesni => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Aes); - public static bool SupportsAvx => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Avx | FeatureFlags1Ecx.Osxsave) && Xcr0InfoEax.HasFlag(Xcr0FlagsEax.Sse | Xcr0FlagsEax.YmmHi128); + public static bool SupportsAvx => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Avx); public static bool SupportsAvx2 => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx2) && SupportsAvx; - public static bool SupportsAvx512F => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512f) && FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Osxsave) - && Xcr0InfoEax.HasFlag(Xcr0FlagsEax.Sse | Xcr0FlagsEax.YmmHi128 | Xcr0FlagsEax.Opmask | Xcr0FlagsEax.ZmmHi256 | Xcr0FlagsEax.Hi16Zmm); - public static bool SupportsAvx512Vl => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512vl) && SupportsAvx512F; - public static bool SupportsAvx512Bw => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512bw) && SupportsAvx512F; - public static bool SupportsAvx512Dq => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512dq) && SupportsAvx512F; public static bool SupportsF16c => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.F16c); public static bool SupportsSha => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Sha); public static bool SupportsGfni => FeatureInfo7Ecx.HasFlag(FeatureFlags7Ecx.Gfni); @@ -132,6 +85,5 @@ namespace ARMeilleure.CodeGen.X86 public static bool ForceLegacySse { get; set; } public static bool SupportsVexEncoding => SupportsAvx && !ForceLegacySse; - public static bool SupportsEvexEncoding => SupportsAvx512F && !ForceLegacySse; } } \ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs index f6b34e29fd..6407a9a7b4 100644 --- a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs +++ b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs @@ -182,7 +182,6 @@ namespace ARMeilleure.CodeGen.X86 Add(Intrinsic.X86Vfnmadd231ss, new IntrinsicInfo(X86Instruction.Vfnmadd231ss, IntrinsicType.Fma)); Add(Intrinsic.X86Vfnmsub231sd, new IntrinsicInfo(X86Instruction.Vfnmsub231sd, IntrinsicType.Fma)); Add(Intrinsic.X86Vfnmsub231ss, new IntrinsicInfo(X86Instruction.Vfnmsub231ss, IntrinsicType.Fma)); - Add(Intrinsic.X86Vpternlogd, new IntrinsicInfo(X86Instruction.Vpternlogd, IntrinsicType.TernaryImm)); Add(Intrinsic.X86Xorpd, new IntrinsicInfo(X86Instruction.Xorpd, IntrinsicType.Binary)); Add(Intrinsic.X86Xorps, new IntrinsicInfo(X86Instruction.Xorps, IntrinsicType.Binary)); } diff --git a/ARMeilleure/CodeGen/X86/X86Instruction.cs b/ARMeilleure/CodeGen/X86/X86Instruction.cs index ecfc432d70..b024394e16 100644 --- a/ARMeilleure/CodeGen/X86/X86Instruction.cs +++ b/ARMeilleure/CodeGen/X86/X86Instruction.cs @@ -219,7 +219,6 @@ namespace ARMeilleure.CodeGen.X86 Vfnmsub231sd, Vfnmsub231ss, Vpblendvb, - Vpternlogd, Xor, Xorpd, Xorps, diff --git a/ARMeilleure/Instructions/InstEmitSimdLogical.cs b/ARMeilleure/Instructions/InstEmitSimdLogical.cs index 171aec7e0e..624ae841d3 100644 --- a/ARMeilleure/Instructions/InstEmitSimdLogical.cs +++ b/ARMeilleure/Instructions/InstEmitSimdLogical.cs @@ -224,22 +224,7 @@ namespace ARMeilleure.Instructions public static void Not_V(ArmEmitterContext context) { - if (Optimizations.UseAvx512Ortho) - { - OpCodeSimd op = (OpCodeSimd)context.CurrOp; - - Operand n = GetVec(op.Rn); - - Operand res = context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, n, Const(0b01010101)); - - if (op.RegisterSize == RegisterSize.Simd64) - { - res = context.VectorZeroUpper64(res); - } - - context.Copy(GetVec(op.Rd), res); - } - else if (Optimizations.UseSse2) + if (Optimizations.UseSse2) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; diff --git a/ARMeilleure/IntermediateRepresentation/Intrinsic.cs b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs index aeebf3027b..bc1285be27 100644 --- a/ARMeilleure/IntermediateRepresentation/Intrinsic.cs +++ b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs @@ -171,7 +171,6 @@ namespace ARMeilleure.IntermediateRepresentation X86Vfnmadd231ss, X86Vfnmsub231sd, X86Vfnmsub231ss, - X86Vpternlogd, X86Xorpd, X86Xorps } diff --git a/ARMeilleure/Optimizations.cs b/ARMeilleure/Optimizations.cs index e8edcd768f..97defd9a95 100644 --- a/ARMeilleure/Optimizations.cs +++ b/ARMeilleure/Optimizations.cs @@ -17,10 +17,6 @@ namespace ARMeilleure public static bool UseSse42IfAvailable { get; set; } = true; public static bool UsePopCntIfAvailable { get; set; } = true; public static bool UseAvxIfAvailable { get; set; } = true; - public static bool UseAvx512FIfAvailable { get; set; } = true; - public static bool UseAvx512VlIfAvailable { get; set; } = true; - public static bool UseAvx512BwIfAvailable { get; set; } = true; - public static bool UseAvx512DqIfAvailable { get; set; } = true; public static bool UseF16cIfAvailable { get; set; } = true; public static bool UseFmaIfAvailable { get; set; } = true; public static bool UseAesniIfAvailable { get; set; } = true; @@ -42,18 +38,11 @@ namespace ARMeilleure internal static bool UseSse42 => UseSse42IfAvailable && HardwareCapabilities.SupportsSse42; internal static bool UsePopCnt => UsePopCntIfAvailable && HardwareCapabilities.SupportsPopcnt; internal static bool UseAvx => UseAvxIfAvailable && HardwareCapabilities.SupportsAvx && !ForceLegacySse; - internal static bool UseAvx512F => UseAvx512FIfAvailable && HardwareCapabilities.SupportsAvx512F && !ForceLegacySse; - internal static bool UseAvx512Vl => UseAvx512VlIfAvailable && HardwareCapabilities.SupportsAvx512Vl && !ForceLegacySse; - internal static bool UseAvx512Bw => UseAvx512BwIfAvailable && HardwareCapabilities.SupportsAvx512Bw && !ForceLegacySse; - internal static bool UseAvx512Dq => UseAvx512DqIfAvailable && HardwareCapabilities.SupportsAvx512Dq && !ForceLegacySse; internal static bool UseF16c => UseF16cIfAvailable && HardwareCapabilities.SupportsF16c; internal static bool UseFma => UseFmaIfAvailable && HardwareCapabilities.SupportsFma; internal static bool UseAesni => UseAesniIfAvailable && HardwareCapabilities.SupportsAesni; internal static bool UsePclmulqdq => UsePclmulqdqIfAvailable && HardwareCapabilities.SupportsPclmulqdq; internal static bool UseSha => UseShaIfAvailable && HardwareCapabilities.SupportsSha; internal static bool UseGfni => UseGfniIfAvailable && HardwareCapabilities.SupportsGfni; - - internal static bool UseAvx512Ortho => UseAvx512F && UseAvx512Vl; - internal static bool UseAvx512OrthoFloat => UseAvx512Ortho && UseAvx512Dq; } } \ No newline at end of file diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs index 22e56a1d26..70f6e0127b 100644 --- a/ARMeilleure/Translation/PTC/Ptc.cs +++ b/ARMeilleure/Translation/PTC/Ptc.cs @@ -27,7 +27,7 @@ namespace ARMeilleure.Translation.PTC private const string OuterHeaderMagicString = "PTCohd\0\0"; private const string InnerHeaderMagicString = "PTCihd\0\0"; - private const uint InternalVersion = 3714; //! To be incremented manually for each change to the ARMeilleure project. + private const uint InternalVersion = 3713; //! To be incremented manually for each change to the ARMeilleure project. private const string ActualDir = "0"; private const string BackupDir = "1"; @@ -952,8 +952,7 @@ namespace ARMeilleure.Translation.PTC (uint)HardwareCapabilities.FeatureInfo1Ecx, (uint)HardwareCapabilities.FeatureInfo1Edx, (uint)HardwareCapabilities.FeatureInfo7Ebx, - (uint)HardwareCapabilities.FeatureInfo7Ecx, - (uint)HardwareCapabilities.Xcr0InfoEax); + (uint)HardwareCapabilities.FeatureInfo7Ecx); } private static byte GetMemoryManagerMode() @@ -973,7 +972,7 @@ namespace ARMeilleure.Translation.PTC return osPlatform; } - [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 62*/)] + [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 58*/)] private struct OuterHeader { public ulong Magic; @@ -1004,8 +1003,8 @@ namespace ARMeilleure.Translation.PTC } } - [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 20*/)] - private record struct FeatureInfo(uint FeatureInfo0, uint FeatureInfo1, uint FeatureInfo2, uint FeatureInfo3, uint FeatureInfo4); + [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 16*/)] + private record struct FeatureInfo(uint FeatureInfo0, uint FeatureInfo1, uint FeatureInfo2, uint FeatureInfo3); [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 128*/)] private struct InnerHeader