From 1c44d9f66da86daa84f31c891f316ee373a2d312 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sun, 18 Feb 2018 01:57:33 -0300 Subject: [PATCH] Fix for some SIMD issues --- Ryujinx/Cpu/AOpCodeTable.cs | 2 +- Ryujinx/Cpu/ATranslator.cs | 4 +- Ryujinx/Cpu/Decoder/AOpCodeSimdMemSs.cs | 14 ++--- Ryujinx/Cpu/Instruction/AInstEmitSimdCvt.cs | 10 +++ .../Cpu/Instruction/AInstEmitSimdHelper.cs | 63 +++++++++++++++---- Ryujinx/Cpu/Instruction/AInstEmitSimdMove.cs | 26 +++++++- Ryujinx/Cpu/Memory/AMemory.cs | 19 ++++++ Ryujinx/Cpu/Translation/AILBlock.cs | 4 +- Ryujinx/Cpu/Translation/AILOpCodeLoad.cs | 2 +- Ryujinx/Cpu/Translation/AILOpCodeStore.cs | 2 +- Ryujinx/Cpu/Translation/AIoType.cs | 5 +- Ryujinx/Ryujinx.conf | 8 +-- Ryujinx/Ui/GLScreen.cs | 1 - 13 files changed, 124 insertions(+), 36 deletions(-) diff --git a/Ryujinx/Cpu/AOpCodeTable.cs b/Ryujinx/Cpu/AOpCodeTable.cs index cfed011e81..a3a8477eb7 100644 --- a/Ryujinx/Cpu/AOpCodeTable.cs +++ b/Ryujinx/Cpu/AOpCodeTable.cs @@ -139,8 +139,8 @@ namespace ChocolArm64 Set("0>101110<<100000100110xxxxxxxxxx", AInstEmit.Cmle_V, typeof(AOpCodeSimd)); Set("0>001110<<100000101010xxxxxxxxxx", AInstEmit.Cmlt_V, typeof(AOpCodeSimd)); Set("0x00111000100000010110xxxxxxxxxx", AInstEmit.Cnt_V, typeof(AOpCodeSimd)); - Set("01011110000xxxxx000001xxxxxxxxxx", AInstEmit.Dup_S, typeof(AOpCodeSimdIns)); Set("0x001110000xxxxx000011xxxxxxxxxx", AInstEmit.Dup_Gp, typeof(AOpCodeSimdIns)); + Set("01011110000xxxxx000001xxxxxxxxxx", AInstEmit.Dup_S, typeof(AOpCodeSimdIns)); Set("0x001110000xxxxx000001xxxxxxxxxx", AInstEmit.Dup_V, typeof(AOpCodeSimdIns)); Set("0x101110001xxxxx000111xxxxxxxxxx", AInstEmit.Eor_V, typeof(AOpCodeSimdReg)); Set("00011110xx100000110000xxxxxxxxxx", AInstEmit.Fabs_S, typeof(AOpCodeSimd)); diff --git a/Ryujinx/Cpu/ATranslator.cs b/Ryujinx/Cpu/ATranslator.cs index ba7f3df6fa..74bb9bb799 100644 --- a/Ryujinx/Cpu/ATranslator.cs +++ b/Ryujinx/Cpu/ATranslator.cs @@ -8,10 +8,10 @@ namespace ChocolArm64 { class ATranslator { - private Dictionary CachedSubs; - public AThread Thread { get; private set; } + private Dictionary CachedSubs; + private bool KeepRunning; public ATranslator(AThread Parent) diff --git a/Ryujinx/Cpu/Decoder/AOpCodeSimdMemSs.cs b/Ryujinx/Cpu/Decoder/AOpCodeSimdMemSs.cs index 52b8e47086..be4a8cd98b 100644 --- a/Ryujinx/Cpu/Decoder/AOpCodeSimdMemSs.cs +++ b/Ryujinx/Cpu/Decoder/AOpCodeSimdMemSs.cs @@ -27,11 +27,9 @@ namespace ChocolArm64.Decoder switch (Scale) { - case 0: Index >>= 0; break; - case 1: { - if ((Index & 1) != 0) + if ((Size & 1) != 0) { Inst = AInst.Undefined; @@ -45,23 +43,23 @@ namespace ChocolArm64.Decoder case 2: { - if ((Index & 2) != 0 || - ((Index & 1) != 0 && S != 0)) + if ((Size & 2) != 0 || + ((Size & 1) != 0 && S != 0)) { Inst = AInst.Undefined; return; } - if ((Index & 1) != 0) + if ((Size & 1) != 0) { Index >>= 3; + + Scale = 3; } else { Index >>= 2; - - Scale = 3; } break; diff --git a/Ryujinx/Cpu/Instruction/AInstEmitSimdCvt.cs b/Ryujinx/Cpu/Instruction/AInstEmitSimdCvt.cs index 68f23914f8..fbb0dfda5a 100644 --- a/Ryujinx/Cpu/Instruction/AInstEmitSimdCvt.cs +++ b/Ryujinx/Cpu/Instruction/AInstEmitSimdCvt.cs @@ -246,6 +246,11 @@ namespace ChocolArm64.Instruction EmitScalarFcvtu(Context, Op.Size, Op.FBits); } + if (Context.CurrOp.RegisterSize == ARegisterSize.Int32) + { + Context.Emit(OpCodes.Conv_U8); + } + Context.EmitStintzr(Op.Rd); } @@ -314,6 +319,11 @@ namespace ChocolArm64.Instruction : nameof(ASoftFallback.SatF64ToU64)); } + if (SizeF == 0) + { + Context.Emit(OpCodes.Conv_U8); + } + EmitVectorInsert(Context, Op.Rd, Index, SizeI); } diff --git a/Ryujinx/Cpu/Instruction/AInstEmitSimdHelper.cs b/Ryujinx/Cpu/Instruction/AInstEmitSimdHelper.cs index fe5290237d..e4cdc9c5e0 100644 --- a/Ryujinx/Cpu/Instruction/AInstEmitSimdHelper.cs +++ b/Ryujinx/Cpu/Instruction/AInstEmitSimdHelper.cs @@ -210,17 +210,17 @@ namespace ChocolArm64.Instruction { AOpCodeSimdRegElem Op = (AOpCodeSimdRegElem)Context.CurrOp; - EmitVectorOpF(Context, Emit, OperFlags.RnRm, Op.Index); + EmitVectorOpByElemF(Context, Emit, Op.Index); } public static void EmitVectorTernaryOpByElemF(AILEmitterCtx Context, Action Emit) { AOpCodeSimdRegElem Op = (AOpCodeSimdRegElem)Context.CurrOp; - EmitVectorOpF(Context, Emit, OperFlags.RdRnRm, Op.Index); + EmitVectorOpByElemF(Context, Emit, Op.Index); } - public static void EmitVectorOpF(AILEmitterCtx Context, Action Emit, OperFlags Opers, int Elem = -1) + public static void EmitVectorOpF(AILEmitterCtx Context, Action Emit, OperFlags Opers) { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; @@ -242,14 +242,7 @@ namespace ChocolArm64.Instruction if (Opers.HasFlag(OperFlags.Rm)) { - if (Elem != -1) - { - EmitVectorExtractF(Context, Op.Rm, Elem, SizeF); - } - else - { - EmitVectorExtractF(Context, Op.Rm, Index, SizeF); - } + EmitVectorExtractF(Context, Op.Rm, Index, SizeF); } Emit(); @@ -263,6 +256,33 @@ namespace ChocolArm64.Instruction } } + public static void EmitVectorOpByElemF(AILEmitterCtx Context, Action Emit, int Elem) + { + AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; + + int SizeF = Op.Size & 1; + + int Bytes = Context.CurrOp.GetBitsCount() >> 3; + + for (int Index = 0; Index < (Bytes >> SizeF + 2); Index++) + { + EmitVectorExtractF(Context, Op.Rn, Index, SizeF); + EmitVectorExtractF(Context, Op.Rm, Elem, SizeF); + + Emit(); + + EmitVectorInsertTmpF(Context, Index, SizeF); + } + + Context.EmitLdvectmp(); + Context.EmitStvec(Op.Rd); + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + public static void EmitVectorUnaryOpSx(AILEmitterCtx Context, Action Emit) { EmitVectorOp(Context, Emit, OperFlags.Rn, true); @@ -534,5 +554,26 @@ namespace ChocolArm64.Instruction Context.EmitStvec(Reg); } + + public static void EmitVectorInsertTmpF(AILEmitterCtx Context, int Index, int Size) + { + Context.EmitLdvectmp(); + Context.EmitLdc_I4(Index); + + if (Size == 0) + { + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertSingle)); + } + else if (Size == 1) + { + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertDouble)); + } + else + { + throw new ArgumentOutOfRangeException(nameof(Size)); + } + + Context.EmitStvectmp(); + } } } \ No newline at end of file diff --git a/Ryujinx/Cpu/Instruction/AInstEmitSimdMove.cs b/Ryujinx/Cpu/Instruction/AInstEmitSimdMove.cs index 87d57e962b..c8f690328f 100644 --- a/Ryujinx/Cpu/Instruction/AInstEmitSimdMove.cs +++ b/Ryujinx/Cpu/Instruction/AInstEmitSimdMove.cs @@ -85,6 +85,8 @@ namespace ChocolArm64.Instruction EmitVectorExtractZx(Context, Op.Rn, 0, 3); + EmitIntZeroHigherIfNeeded(Context); + Context.EmitStintzr(Op.Rd); } @@ -94,6 +96,8 @@ namespace ChocolArm64.Instruction EmitVectorExtractZx(Context, Op.Rn, 1, 3); + EmitIntZeroHigherIfNeeded(Context); + Context.EmitStintzr(Op.Rd); } @@ -103,6 +107,8 @@ namespace ChocolArm64.Instruction Context.EmitLdintzr(Op.Rn); + EmitIntZeroHigherIfNeeded(Context); + EmitScalarSet(Context, Op.Rd, 3); } @@ -112,6 +118,8 @@ namespace ChocolArm64.Instruction Context.EmitLdintzr(Op.Rn); + EmitIntZeroHigherIfNeeded(Context); + EmitVectorInsert(Context, Op.Rd, 1, 3); } @@ -137,12 +145,19 @@ namespace ChocolArm64.Instruction { AOpCodeSimdImm Op = (AOpCodeSimdImm)Context.CurrOp; - for (int Index = 0; Index < (4 >> Op.Size); Index++) + int Elems = Op.RegisterSize == ARegisterSize.SIMD128 ? 4 : 2; + + for (int Index = 0; Index < (Elems >> Op.Size); Index++) { Context.EmitLdc_I8(Op.Imm); EmitVectorInsert(Context, Op.Rd, Index, Op.Size + 2); } + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } } public static void Ins_Gp(AILEmitterCtx Context) @@ -248,6 +263,15 @@ namespace ChocolArm64.Instruction } } + private static void EmitIntZeroHigherIfNeeded(AILEmitterCtx Context) + { + if (Context.CurrOp.RegisterSize == ARegisterSize.Int32) + { + Context.Emit(OpCodes.Conv_U4); + Context.Emit(OpCodes.Conv_U8); + } + } + private static void EmitVectorUnzip(AILEmitterCtx Context, int Part) { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; diff --git a/Ryujinx/Cpu/Memory/AMemory.cs b/Ryujinx/Cpu/Memory/AMemory.cs index 52f76e2ec5..158765b87e 100644 --- a/Ryujinx/Cpu/Memory/AMemory.cs +++ b/Ryujinx/Cpu/Memory/AMemory.cs @@ -2,6 +2,7 @@ using ChocolArm64.Exceptions; using ChocolArm64.State; using System; using System.Collections.Generic; +using System.Runtime.CompilerServices; namespace ChocolArm64.Memory { @@ -138,6 +139,7 @@ namespace ChocolArm64.Memory public int ReadInt32(long Position) => (int)ReadUInt32(Position); public long ReadInt64(long Position) => (long)ReadUInt64(Position); + [MethodImpl(MethodImplOptions.AggressiveInlining)] public byte ReadByte(long Position) { #if DEBUG @@ -147,6 +149,7 @@ namespace ChocolArm64.Memory return *((byte*)(RamPtr + (uint)Position)); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public ushort ReadUInt16(long Position) { #if DEBUG @@ -156,6 +159,7 @@ namespace ChocolArm64.Memory return *((ushort*)(RamPtr + (uint)Position)); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public uint ReadUInt32(long Position) { #if DEBUG @@ -165,6 +169,7 @@ namespace ChocolArm64.Memory return *((uint*)(RamPtr + (uint)Position)); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public ulong ReadUInt64(long Position) { #if DEBUG @@ -174,6 +179,7 @@ namespace ChocolArm64.Memory return *((ulong*)(RamPtr + (uint)Position)); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public AVec ReadVector8(long Position) { #if DEBUG @@ -183,6 +189,7 @@ namespace ChocolArm64.Memory return new AVec() { B0 = ReadByte(Position) }; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public AVec ReadVector16(long Position) { #if DEBUG @@ -192,6 +199,7 @@ namespace ChocolArm64.Memory return new AVec() { H0 = ReadUInt16(Position) }; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public AVec ReadVector32(long Position) { #if DEBUG @@ -201,6 +209,7 @@ namespace ChocolArm64.Memory return new AVec() { W0 = ReadUInt32(Position) }; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public AVec ReadVector64(long Position) { #if DEBUG @@ -210,6 +219,7 @@ namespace ChocolArm64.Memory return new AVec() { X0 = ReadUInt64(Position) }; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public AVec ReadVector128(long Position) { #if DEBUG @@ -228,6 +238,7 @@ namespace ChocolArm64.Memory public void WriteInt32(long Position, int Value) => WriteUInt32(Position, (uint)Value); public void WriteInt64(long Position, long Value) => WriteUInt64(Position, (ulong)Value); + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void WriteByte(long Position, byte Value) { #if DEBUG @@ -237,6 +248,7 @@ namespace ChocolArm64.Memory *((byte*)(RamPtr + (uint)Position)) = Value; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void WriteUInt16(long Position, ushort Value) { #if DEBUG @@ -246,6 +258,7 @@ namespace ChocolArm64.Memory *((ushort*)(RamPtr + (uint)Position)) = Value; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void WriteUInt32(long Position, uint Value) { #if DEBUG @@ -255,6 +268,7 @@ namespace ChocolArm64.Memory *((uint*)(RamPtr + (uint)Position)) = Value; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void WriteUInt64(long Position, ulong Value) { #if DEBUG @@ -264,6 +278,7 @@ namespace ChocolArm64.Memory *((ulong*)(RamPtr + (uint)Position)) = Value; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void WriteVector8(long Position, AVec Value) { #if DEBUG @@ -273,6 +288,7 @@ namespace ChocolArm64.Memory WriteByte(Position, Value.B0); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void WriteVector16(long Position, AVec Value) { #if DEBUG @@ -282,6 +298,7 @@ namespace ChocolArm64.Memory WriteUInt16(Position, Value.H0); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void WriteVector32(long Position, AVec Value) { #if DEBUG @@ -291,6 +308,7 @@ namespace ChocolArm64.Memory WriteUInt32(Position, Value.W0); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void WriteVector64(long Position, AVec Value) { #if DEBUG @@ -300,6 +318,7 @@ namespace ChocolArm64.Memory WriteUInt64(Position, Value.X0); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void WriteVector128(long Position, AVec Value) { #if DEBUG diff --git a/Ryujinx/Cpu/Translation/AILBlock.cs b/Ryujinx/Cpu/Translation/AILBlock.cs index 2746e4288d..bed195aaf0 100644 --- a/Ryujinx/Cpu/Translation/AILBlock.cs +++ b/Ryujinx/Cpu/Translation/AILBlock.cs @@ -26,7 +26,7 @@ namespace ChocolArm64.Translation { if (ILEmitter is AILOpCodeLoad Ld && AILEmitter.IsRegIndex(Ld.Index)) { - switch (Ld.IoType & AIoType.Mask) + switch (Ld.IoType) { case AIoType.Flag: IntInputs |= ((1L << Ld.Index) << 32) & ~IntOutputs; break; case AIoType.Int: IntInputs |= (1L << Ld.Index) & ~IntOutputs; break; @@ -37,7 +37,7 @@ namespace ChocolArm64.Translation { if (AILEmitter.IsRegIndex(St.Index)) { - switch (St.IoType & AIoType.Mask) + switch (St.IoType) { case AIoType.Flag: IntOutputs |= (1L << St.Index) << 32; break; case AIoType.Int: IntOutputs |= 1L << St.Index; break; diff --git a/Ryujinx/Cpu/Translation/AILOpCodeLoad.cs b/Ryujinx/Cpu/Translation/AILOpCodeLoad.cs index 23ea0d07bf..32da2cdb8b 100644 --- a/Ryujinx/Cpu/Translation/AILOpCodeLoad.cs +++ b/Ryujinx/Cpu/Translation/AILOpCodeLoad.cs @@ -22,7 +22,7 @@ namespace ChocolArm64.Translation public void Emit(AILEmitter Context) { - switch (IoType & AIoType.Mask) + switch (IoType) { case AIoType.Arg: Context.Generator.EmitLdarg(Index); break; diff --git a/Ryujinx/Cpu/Translation/AILOpCodeStore.cs b/Ryujinx/Cpu/Translation/AILOpCodeStore.cs index 87d3f85a94..d14dce5bfe 100644 --- a/Ryujinx/Cpu/Translation/AILOpCodeStore.cs +++ b/Ryujinx/Cpu/Translation/AILOpCodeStore.cs @@ -22,7 +22,7 @@ namespace ChocolArm64.Translation public void Emit(AILEmitter Context) { - switch (IoType & AIoType.Mask) + switch (IoType) { case AIoType.Arg: Context.Generator.EmitStarg(Index); break; diff --git a/Ryujinx/Cpu/Translation/AIoType.cs b/Ryujinx/Cpu/Translation/AIoType.cs index 34aa224e5f..94f8908142 100644 --- a/Ryujinx/Cpu/Translation/AIoType.cs +++ b/Ryujinx/Cpu/Translation/AIoType.cs @@ -10,9 +10,6 @@ namespace ChocolArm64.Translation Flag, Int, Float, - Vector, - Mask = 0xff, - VectorI = Vector | 1 << 8, - VectorF = Vector | 1 << 9 + Vector } } \ No newline at end of file diff --git a/Ryujinx/Ryujinx.conf b/Ryujinx/Ryujinx.conf index 0c6727f6bb..8f791df311 100644 --- a/Ryujinx/Ryujinx.conf +++ b/Ryujinx/Ryujinx.conf @@ -25,10 +25,10 @@ Controls_Left_FakeJoycon_Stick_Down = 93 Controls_Left_FakeJoycon_Stick_Left = 92 Controls_Left_FakeJoycon_Stick_Right = 94 Controls_Left_FakeJoycon_Stick_Button = 0 -Controls_Left_FakeJoycon_DPad_Up = 0 -Controls_Left_FakeJoycon_DPad_Down = 0 -Controls_Left_FakeJoycon_DPad_Left = 0 -Controls_Left_FakeJoycon_DPad_Right = 0 +Controls_Left_FakeJoycon_DPad_Up = 45 +Controls_Left_FakeJoycon_DPad_Down = 46 +Controls_Left_FakeJoycon_DPad_Left = 47 +Controls_Left_FakeJoycon_DPad_Right = 48 Controls_Left_FakeJoycon_Button_Minus = 52 Controls_Left_FakeJoycon_Button_L = 0 Controls_Left_FakeJoycon_Button_ZL = 0 diff --git a/Ryujinx/Ui/GLScreen.cs b/Ryujinx/Ui/GLScreen.cs index 2cab73f405..0f03d4a0fa 100644 --- a/Ryujinx/Ui/GLScreen.cs +++ b/Ryujinx/Ui/GLScreen.cs @@ -6,7 +6,6 @@ using Gal; using OpenTK; using OpenTK.Graphics; using OpenTK.Graphics.OpenGL; -using Ryujinx.OsHle; using System; namespace Ryujinx