From 1c44d9f66da86daa84f31c891f316ee373a2d312 Mon Sep 17 00:00:00 2001
From: gdkchan <gab.dark.100@gmail.com>
Date: Sun, 18 Feb 2018 01:57:33 -0300
Subject: [PATCH] Fix for some SIMD issues

---
 Ryujinx/Cpu/AOpCodeTable.cs                   |  2 +-
 Ryujinx/Cpu/ATranslator.cs                    |  4 +-
 Ryujinx/Cpu/Decoder/AOpCodeSimdMemSs.cs       | 14 ++---
 Ryujinx/Cpu/Instruction/AInstEmitSimdCvt.cs   | 10 +++
 .../Cpu/Instruction/AInstEmitSimdHelper.cs    | 63 +++++++++++++++----
 Ryujinx/Cpu/Instruction/AInstEmitSimdMove.cs  | 26 +++++++-
 Ryujinx/Cpu/Memory/AMemory.cs                 | 19 ++++++
 Ryujinx/Cpu/Translation/AILBlock.cs           |  4 +-
 Ryujinx/Cpu/Translation/AILOpCodeLoad.cs      |  2 +-
 Ryujinx/Cpu/Translation/AILOpCodeStore.cs     |  2 +-
 Ryujinx/Cpu/Translation/AIoType.cs            |  5 +-
 Ryujinx/Ryujinx.conf                          |  8 +--
 Ryujinx/Ui/GLScreen.cs                        |  1 -
 13 files changed, 124 insertions(+), 36 deletions(-)

diff --git a/Ryujinx/Cpu/AOpCodeTable.cs b/Ryujinx/Cpu/AOpCodeTable.cs
index cfed011e81..a3a8477eb7 100644
--- a/Ryujinx/Cpu/AOpCodeTable.cs
+++ b/Ryujinx/Cpu/AOpCodeTable.cs
@@ -139,8 +139,8 @@ namespace ChocolArm64
             Set("0>101110<<100000100110xxxxxxxxxx", AInstEmit.Cmle_V,        typeof(AOpCodeSimd));
             Set("0>001110<<100000101010xxxxxxxxxx", AInstEmit.Cmlt_V,        typeof(AOpCodeSimd));
             Set("0x00111000100000010110xxxxxxxxxx", AInstEmit.Cnt_V,         typeof(AOpCodeSimd));
-            Set("01011110000xxxxx000001xxxxxxxxxx", AInstEmit.Dup_S,         typeof(AOpCodeSimdIns));
             Set("0x001110000xxxxx000011xxxxxxxxxx", AInstEmit.Dup_Gp,        typeof(AOpCodeSimdIns));
+            Set("01011110000xxxxx000001xxxxxxxxxx", AInstEmit.Dup_S,         typeof(AOpCodeSimdIns));
             Set("0x001110000xxxxx000001xxxxxxxxxx", AInstEmit.Dup_V,         typeof(AOpCodeSimdIns));
             Set("0x101110001xxxxx000111xxxxxxxxxx", AInstEmit.Eor_V,         typeof(AOpCodeSimdReg));
             Set("00011110xx100000110000xxxxxxxxxx", AInstEmit.Fabs_S,        typeof(AOpCodeSimd));
diff --git a/Ryujinx/Cpu/ATranslator.cs b/Ryujinx/Cpu/ATranslator.cs
index ba7f3df6fa..74bb9bb799 100644
--- a/Ryujinx/Cpu/ATranslator.cs
+++ b/Ryujinx/Cpu/ATranslator.cs
@@ -8,10 +8,10 @@ namespace ChocolArm64
 {
     class ATranslator
     {
-        private Dictionary<long, ATranslatedSub> CachedSubs;
-
         public AThread Thread { get; private set; }
 
+        private Dictionary<long, ATranslatedSub> CachedSubs;
+
         private bool KeepRunning;
 
         public ATranslator(AThread Parent)
diff --git a/Ryujinx/Cpu/Decoder/AOpCodeSimdMemSs.cs b/Ryujinx/Cpu/Decoder/AOpCodeSimdMemSs.cs
index 52b8e47086..be4a8cd98b 100644
--- a/Ryujinx/Cpu/Decoder/AOpCodeSimdMemSs.cs
+++ b/Ryujinx/Cpu/Decoder/AOpCodeSimdMemSs.cs
@@ -27,11 +27,9 @@ namespace ChocolArm64.Decoder
 
             switch (Scale)
             {
-                case 0: Index >>= 0; break;
-
                 case 1:
                 {
-                    if ((Index & 1) != 0)
+                    if ((Size & 1) != 0)
                     {
                         Inst = AInst.Undefined;
 
@@ -45,23 +43,23 @@ namespace ChocolArm64.Decoder
 
                 case 2:
                 {
-                    if ((Index & 2) != 0 ||
-                       ((Index & 1) != 0 && S != 0))
+                    if ((Size & 2) != 0 ||
+                       ((Size & 1) != 0 && S != 0))
                     {
                         Inst = AInst.Undefined;
 
                         return;
                     }
 
-                    if ((Index & 1) != 0)
+                    if ((Size & 1) != 0)
                     {
                         Index >>= 3;
+
+                        Scale = 3;
                     }
                     else
                     {
                         Index >>= 2;
-
-                        Scale = 3;
                     }
 
                     break;
diff --git a/Ryujinx/Cpu/Instruction/AInstEmitSimdCvt.cs b/Ryujinx/Cpu/Instruction/AInstEmitSimdCvt.cs
index 68f23914f8..fbb0dfda5a 100644
--- a/Ryujinx/Cpu/Instruction/AInstEmitSimdCvt.cs
+++ b/Ryujinx/Cpu/Instruction/AInstEmitSimdCvt.cs
@@ -246,6 +246,11 @@ namespace ChocolArm64.Instruction
                 EmitScalarFcvtu(Context, Op.Size, Op.FBits);
             }
 
+            if (Context.CurrOp.RegisterSize == ARegisterSize.Int32)
+            {
+                Context.Emit(OpCodes.Conv_U8);
+            }
+
             Context.EmitStintzr(Op.Rd);
         }
 
@@ -314,6 +319,11 @@ namespace ChocolArm64.Instruction
                         : nameof(ASoftFallback.SatF64ToU64));
                 }
 
+                if (SizeF == 0)
+                {
+                    Context.Emit(OpCodes.Conv_U8);
+                }
+
                 EmitVectorInsert(Context, Op.Rd, Index, SizeI);
             }
 
diff --git a/Ryujinx/Cpu/Instruction/AInstEmitSimdHelper.cs b/Ryujinx/Cpu/Instruction/AInstEmitSimdHelper.cs
index fe5290237d..e4cdc9c5e0 100644
--- a/Ryujinx/Cpu/Instruction/AInstEmitSimdHelper.cs
+++ b/Ryujinx/Cpu/Instruction/AInstEmitSimdHelper.cs
@@ -210,17 +210,17 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimdRegElem Op = (AOpCodeSimdRegElem)Context.CurrOp;
 
-            EmitVectorOpF(Context, Emit, OperFlags.RnRm, Op.Index);
+            EmitVectorOpByElemF(Context, Emit, Op.Index);
         }
 
         public static void EmitVectorTernaryOpByElemF(AILEmitterCtx Context, Action Emit)
         {
             AOpCodeSimdRegElem Op = (AOpCodeSimdRegElem)Context.CurrOp;
 
-            EmitVectorOpF(Context, Emit, OperFlags.RdRnRm, Op.Index);
+            EmitVectorOpByElemF(Context, Emit, Op.Index);
         }
 
-        public static void EmitVectorOpF(AILEmitterCtx Context, Action Emit, OperFlags Opers, int Elem = -1)
+        public static void EmitVectorOpF(AILEmitterCtx Context, Action Emit, OperFlags Opers)
         {
             AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
 
@@ -242,14 +242,7 @@ namespace ChocolArm64.Instruction
 
                 if (Opers.HasFlag(OperFlags.Rm))
                 {
-                    if (Elem != -1)
-                    {
-                        EmitVectorExtractF(Context, Op.Rm, Elem, SizeF);
-                    }
-                    else
-                    {
-                        EmitVectorExtractF(Context, Op.Rm, Index, SizeF);
-                    }
+                    EmitVectorExtractF(Context, Op.Rm, Index, SizeF);
                 }
 
                 Emit();
@@ -263,6 +256,33 @@ namespace ChocolArm64.Instruction
             }
         }
 
+        public static void EmitVectorOpByElemF(AILEmitterCtx Context, Action Emit, int Elem)
+        {
+            AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
+
+            int SizeF = Op.Size & 1;
+
+            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+
+            for (int Index = 0; Index < (Bytes >> SizeF + 2); Index++)
+            {
+                EmitVectorExtractF(Context, Op.Rn, Index, SizeF);
+                EmitVectorExtractF(Context, Op.Rm, Elem,  SizeF);
+
+                Emit();
+
+                EmitVectorInsertTmpF(Context, Index, SizeF);
+            }
+
+            Context.EmitLdvectmp();
+            Context.EmitStvec(Op.Rd);
+
+            if (Op.RegisterSize == ARegisterSize.SIMD64)
+            {
+                EmitVectorZeroUpper(Context, Op.Rd);
+            }
+        }
+
         public static void EmitVectorUnaryOpSx(AILEmitterCtx Context, Action Emit)
         {
             EmitVectorOp(Context, Emit, OperFlags.Rn, true);
@@ -534,5 +554,26 @@ namespace ChocolArm64.Instruction
 
             Context.EmitStvec(Reg);
         }
+
+        public static void EmitVectorInsertTmpF(AILEmitterCtx Context, int Index, int Size)
+        {
+            Context.EmitLdvectmp();
+            Context.EmitLdc_I4(Index);
+
+            if (Size == 0)
+            {
+                ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertSingle));
+            }
+            else if (Size == 1)
+            {
+                ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertDouble));
+            }
+            else
+            {
+                throw new ArgumentOutOfRangeException(nameof(Size));
+            }
+
+            Context.EmitStvectmp();
+        }
     }
 }
\ No newline at end of file
diff --git a/Ryujinx/Cpu/Instruction/AInstEmitSimdMove.cs b/Ryujinx/Cpu/Instruction/AInstEmitSimdMove.cs
index 87d57e962b..c8f690328f 100644
--- a/Ryujinx/Cpu/Instruction/AInstEmitSimdMove.cs
+++ b/Ryujinx/Cpu/Instruction/AInstEmitSimdMove.cs
@@ -85,6 +85,8 @@ namespace ChocolArm64.Instruction
 
             EmitVectorExtractZx(Context, Op.Rn, 0, 3);
 
+            EmitIntZeroHigherIfNeeded(Context);
+
             Context.EmitStintzr(Op.Rd);
         }
 
@@ -94,6 +96,8 @@ namespace ChocolArm64.Instruction
 
             EmitVectorExtractZx(Context, Op.Rn, 1, 3);
 
+            EmitIntZeroHigherIfNeeded(Context);
+
             Context.EmitStintzr(Op.Rd);
         }
 
@@ -103,6 +107,8 @@ namespace ChocolArm64.Instruction
 
             Context.EmitLdintzr(Op.Rn);
 
+            EmitIntZeroHigherIfNeeded(Context);
+
             EmitScalarSet(Context, Op.Rd, 3);
         }
 
@@ -112,6 +118,8 @@ namespace ChocolArm64.Instruction
 
             Context.EmitLdintzr(Op.Rn);
 
+            EmitIntZeroHigherIfNeeded(Context);
+
             EmitVectorInsert(Context, Op.Rd, 1, 3);
         }
 
@@ -137,12 +145,19 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimdImm Op = (AOpCodeSimdImm)Context.CurrOp;
 
-            for (int Index = 0; Index < (4 >> Op.Size); Index++)
+            int Elems = Op.RegisterSize == ARegisterSize.SIMD128 ? 4 : 2;
+
+            for (int Index = 0; Index < (Elems >> Op.Size); Index++)
             {
                 Context.EmitLdc_I8(Op.Imm);
 
                 EmitVectorInsert(Context, Op.Rd, Index, Op.Size + 2);
             }
+
+            if (Op.RegisterSize == ARegisterSize.SIMD64)
+            {
+                EmitVectorZeroUpper(Context, Op.Rd);
+            }
         }
 
         public static void Ins_Gp(AILEmitterCtx Context)
@@ -248,6 +263,15 @@ namespace ChocolArm64.Instruction
             }
         }
 
+        private static void EmitIntZeroHigherIfNeeded(AILEmitterCtx Context)
+        {
+            if (Context.CurrOp.RegisterSize == ARegisterSize.Int32)
+            {
+                Context.Emit(OpCodes.Conv_U4);
+                Context.Emit(OpCodes.Conv_U8);
+            }
+        }
+
         private static void EmitVectorUnzip(AILEmitterCtx Context, int Part)
         {
             AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
diff --git a/Ryujinx/Cpu/Memory/AMemory.cs b/Ryujinx/Cpu/Memory/AMemory.cs
index 52f76e2ec5..158765b87e 100644
--- a/Ryujinx/Cpu/Memory/AMemory.cs
+++ b/Ryujinx/Cpu/Memory/AMemory.cs
@@ -2,6 +2,7 @@ using ChocolArm64.Exceptions;
 using ChocolArm64.State;
 using System;
 using System.Collections.Generic;
+using System.Runtime.CompilerServices;
 
 namespace ChocolArm64.Memory
 {
@@ -138,6 +139,7 @@ namespace ChocolArm64.Memory
         public int   ReadInt32(long Position) =>   (int)ReadUInt32(Position);
         public long  ReadInt64(long Position) =>  (long)ReadUInt64(Position);
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public byte ReadByte(long Position)
         {
 #if DEBUG
@@ -147,6 +149,7 @@ namespace ChocolArm64.Memory
             return *((byte*)(RamPtr + (uint)Position));
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public ushort ReadUInt16(long Position)
         {
 #if DEBUG
@@ -156,6 +159,7 @@ namespace ChocolArm64.Memory
             return *((ushort*)(RamPtr + (uint)Position));
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public uint ReadUInt32(long Position)
         {
 #if DEBUG
@@ -165,6 +169,7 @@ namespace ChocolArm64.Memory
             return *((uint*)(RamPtr + (uint)Position));
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public ulong ReadUInt64(long Position)
         {
 #if DEBUG
@@ -174,6 +179,7 @@ namespace ChocolArm64.Memory
             return *((ulong*)(RamPtr + (uint)Position));
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public AVec ReadVector8(long Position)
         {
 #if DEBUG
@@ -183,6 +189,7 @@ namespace ChocolArm64.Memory
             return new AVec() { B0 = ReadByte(Position) };
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public AVec ReadVector16(long Position)
         {
 #if DEBUG
@@ -192,6 +199,7 @@ namespace ChocolArm64.Memory
             return new AVec() { H0 = ReadUInt16(Position) };
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public AVec ReadVector32(long Position)
         {
 #if DEBUG
@@ -201,6 +209,7 @@ namespace ChocolArm64.Memory
             return new AVec() { W0 = ReadUInt32(Position) };
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public AVec ReadVector64(long Position)
         {
 #if DEBUG
@@ -210,6 +219,7 @@ namespace ChocolArm64.Memory
             return new AVec() { X0 = ReadUInt64(Position) };
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public AVec ReadVector128(long Position)
         {
 #if DEBUG
@@ -228,6 +238,7 @@ namespace ChocolArm64.Memory
         public void WriteInt32(long Position, int   Value) => WriteUInt32(Position,   (uint)Value);
         public void WriteInt64(long Position, long  Value) => WriteUInt64(Position,  (ulong)Value);
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public void WriteByte(long Position, byte Value)
         {
 #if DEBUG
@@ -237,6 +248,7 @@ namespace ChocolArm64.Memory
             *((byte*)(RamPtr + (uint)Position)) = Value;
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public void WriteUInt16(long Position, ushort Value)
         {
 #if DEBUG
@@ -246,6 +258,7 @@ namespace ChocolArm64.Memory
             *((ushort*)(RamPtr + (uint)Position)) = Value;
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public void WriteUInt32(long Position, uint Value)
         {
 #if DEBUG
@@ -255,6 +268,7 @@ namespace ChocolArm64.Memory
             *((uint*)(RamPtr + (uint)Position)) = Value;
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public void WriteUInt64(long Position, ulong Value)
         {
 #if DEBUG
@@ -264,6 +278,7 @@ namespace ChocolArm64.Memory
             *((ulong*)(RamPtr + (uint)Position)) = Value;
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public void WriteVector8(long Position, AVec Value)
         {
 #if DEBUG
@@ -273,6 +288,7 @@ namespace ChocolArm64.Memory
             WriteByte(Position, Value.B0);
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public void WriteVector16(long Position, AVec Value)
         {
 #if DEBUG
@@ -282,6 +298,7 @@ namespace ChocolArm64.Memory
             WriteUInt16(Position, Value.H0);
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public void WriteVector32(long Position, AVec Value)
         {
 #if DEBUG
@@ -291,6 +308,7 @@ namespace ChocolArm64.Memory
             WriteUInt32(Position, Value.W0);
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public void WriteVector64(long Position, AVec Value)
         {
 #if DEBUG
@@ -300,6 +318,7 @@ namespace ChocolArm64.Memory
             WriteUInt64(Position, Value.X0);
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public void WriteVector128(long Position, AVec Value)
         {
 #if DEBUG
diff --git a/Ryujinx/Cpu/Translation/AILBlock.cs b/Ryujinx/Cpu/Translation/AILBlock.cs
index 2746e4288d..bed195aaf0 100644
--- a/Ryujinx/Cpu/Translation/AILBlock.cs
+++ b/Ryujinx/Cpu/Translation/AILBlock.cs
@@ -26,7 +26,7 @@ namespace ChocolArm64.Translation
         {
             if (ILEmitter is AILOpCodeLoad Ld && AILEmitter.IsRegIndex(Ld.Index))
             {
-                switch (Ld.IoType & AIoType.Mask)
+                switch (Ld.IoType)
                 {
                     case AIoType.Flag:   IntInputs |= ((1L << Ld.Index) << 32) & ~IntOutputs; break;
                     case AIoType.Int:    IntInputs |=  (1L << Ld.Index)        & ~IntOutputs; break;
@@ -37,7 +37,7 @@ namespace ChocolArm64.Translation
             {
                 if (AILEmitter.IsRegIndex(St.Index))
                 {
-                    switch (St.IoType & AIoType.Mask)
+                    switch (St.IoType)
                     {
                         case AIoType.Flag:   IntOutputs |= (1L << St.Index) << 32; break;
                         case AIoType.Int:    IntOutputs |=  1L << St.Index;        break;
diff --git a/Ryujinx/Cpu/Translation/AILOpCodeLoad.cs b/Ryujinx/Cpu/Translation/AILOpCodeLoad.cs
index 23ea0d07bf..32da2cdb8b 100644
--- a/Ryujinx/Cpu/Translation/AILOpCodeLoad.cs
+++ b/Ryujinx/Cpu/Translation/AILOpCodeLoad.cs
@@ -22,7 +22,7 @@ namespace ChocolArm64.Translation
 
         public void Emit(AILEmitter Context)
         {
-            switch (IoType & AIoType.Mask)
+            switch (IoType)
             {
                 case AIoType.Arg: Context.Generator.EmitLdarg(Index); break;
 
diff --git a/Ryujinx/Cpu/Translation/AILOpCodeStore.cs b/Ryujinx/Cpu/Translation/AILOpCodeStore.cs
index 87d3f85a94..d14dce5bfe 100644
--- a/Ryujinx/Cpu/Translation/AILOpCodeStore.cs
+++ b/Ryujinx/Cpu/Translation/AILOpCodeStore.cs
@@ -22,7 +22,7 @@ namespace ChocolArm64.Translation
 
         public void Emit(AILEmitter Context)
         {
-            switch (IoType & AIoType.Mask)
+            switch (IoType)
             {
                 case AIoType.Arg: Context.Generator.EmitStarg(Index); break;
 
diff --git a/Ryujinx/Cpu/Translation/AIoType.cs b/Ryujinx/Cpu/Translation/AIoType.cs
index 34aa224e5f..94f8908142 100644
--- a/Ryujinx/Cpu/Translation/AIoType.cs
+++ b/Ryujinx/Cpu/Translation/AIoType.cs
@@ -10,9 +10,6 @@ namespace ChocolArm64.Translation
         Flag,
         Int,
         Float,
-        Vector,
-        Mask    = 0xff,
-        VectorI = Vector | 1 << 8,
-        VectorF = Vector | 1 << 9
+        Vector
     }
 }
\ No newline at end of file
diff --git a/Ryujinx/Ryujinx.conf b/Ryujinx/Ryujinx.conf
index 0c6727f6bb..8f791df311 100644
--- a/Ryujinx/Ryujinx.conf
+++ b/Ryujinx/Ryujinx.conf
@@ -25,10 +25,10 @@ Controls_Left_FakeJoycon_Stick_Down = 93
 Controls_Left_FakeJoycon_Stick_Left = 92
 Controls_Left_FakeJoycon_Stick_Right = 94
 Controls_Left_FakeJoycon_Stick_Button = 0
-Controls_Left_FakeJoycon_DPad_Up = 0
-Controls_Left_FakeJoycon_DPad_Down = 0
-Controls_Left_FakeJoycon_DPad_Left = 0
-Controls_Left_FakeJoycon_DPad_Right = 0
+Controls_Left_FakeJoycon_DPad_Up = 45
+Controls_Left_FakeJoycon_DPad_Down = 46
+Controls_Left_FakeJoycon_DPad_Left = 47
+Controls_Left_FakeJoycon_DPad_Right = 48
 Controls_Left_FakeJoycon_Button_Minus = 52
 Controls_Left_FakeJoycon_Button_L = 0
 Controls_Left_FakeJoycon_Button_ZL = 0
diff --git a/Ryujinx/Ui/GLScreen.cs b/Ryujinx/Ui/GLScreen.cs
index 2cab73f405..0f03d4a0fa 100644
--- a/Ryujinx/Ui/GLScreen.cs
+++ b/Ryujinx/Ui/GLScreen.cs
@@ -6,7 +6,6 @@ using Gal;
 using OpenTK;
 using OpenTK.Graphics;
 using OpenTK.Graphics.OpenGL;
-using Ryujinx.OsHle;
 using System;
 
 namespace Ryujinx