forked from Mirror/Ryujinx
Fix increment on Arm32 NEON VLDn/VSTn instructions with regs > 1 (#3695)
* Fix increment on Arm32 NEON VLDn/VSTn instructions with regs > 1 * PPTC version bump * PR feedback
This commit is contained in:
parent
2492e7e808
commit
729ff5337c
5 changed files with 71 additions and 39 deletions
|
@ -1,11 +1,10 @@
|
|||
using ARMeilleure.State;
|
||||
using System;
|
||||
|
||||
namespace ARMeilleure.Decoders
|
||||
{
|
||||
class OpCode32SimdMemPair : OpCode32, IOpCode32Simd
|
||||
{
|
||||
private static int[] RegsMap =
|
||||
private static int[] _regsMap =
|
||||
{
|
||||
1, 1, 4, 2,
|
||||
1, 1, 3, 1,
|
||||
|
@ -40,9 +39,9 @@ namespace ARMeilleure.Decoders
|
|||
WBack = Rm != RegisterAlias.Aarch32Pc;
|
||||
RegisterIndex = Rm != RegisterAlias.Aarch32Pc && Rm != RegisterAlias.Aarch32Sp;
|
||||
|
||||
Regs = RegsMap[(opCode >> 8) & 0xf];
|
||||
Regs = _regsMap[(opCode >> 8) & 0xf];
|
||||
|
||||
Increment = Math.Min(Regs, ((opCode >> 8) & 0x1) + 1);
|
||||
Increment = ((opCode >> 8) & 0x1) + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -888,18 +888,31 @@ namespace ARMeilleure.Decoders
|
|||
SetA32("111100100x00xxxxxxxx1100xxx1xxxx", InstName.Vfma, InstEmit32.Vfma_V, OpCode32SimdReg.Create);
|
||||
SetA32("111100100x10xxxxxxxx1100xxx1xxxx", InstName.Vfms, InstEmit32.Vfms_V, OpCode32SimdReg.Create);
|
||||
SetA32("1111001x0x<<xxxxxxxx0000xxx0xxxx", InstName.Vhadd, InstEmit32.Vhadd, OpCode32SimdReg.Create);
|
||||
SetA32("111101001x10xxxxxxxxxx00xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101000x10xxxxxxxx0111xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemPair.Create); // Regs = 1.
|
||||
SetA32("111101000x10xxxxxxxx1010xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemPair.Create); // Regs = 2.
|
||||
SetA32("111101000x10xxxxxxxx0110xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemPair.Create); // Regs = 3.
|
||||
SetA32("111101001x10xxxxxxxx0000xxx0xxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101001x10xxxxxxxx0100xx0xxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101001x10xxxxxxxx1000x000xxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101001x10xxxxxxxx1000x011xxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101001x10xxxxxxxx110000x0xxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101001x10xxxxxxxx110001xxxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101001x10xxxxxxxx110010xxxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101000x10xxxxxxxx0111xx0xxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemPair.Create); // Regs = 1.
|
||||
SetA32("111101000x10xxxxxxxx1010xx<<xxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemPair.Create); // Regs = 2.
|
||||
SetA32("111101000x10xxxxxxxx0110xx0xxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemPair.Create); // Regs = 3.
|
||||
SetA32("111101000x10xxxxxxxx0010xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemPair.Create); // Regs = 4.
|
||||
SetA32("111101001x10xxxxxxxxxx01xxxxxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101000x10xxxxxxxx100xxxxxxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemPair.Create); // Regs = 1, inc = 1/2 (itype).
|
||||
SetA32("111101000x10xxxxxxxx0011xxxxxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemPair.Create); // Regs = 2, inc = 2.
|
||||
SetA32("111101001x10xxxxxxxxxx10xxxxxxxx", InstName.Vld3, InstEmit32.Vld3, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101000x10xxxxxxxx010xxxxxxxxx", InstName.Vld3, InstEmit32.Vld3, OpCode32SimdMemPair.Create); // Inc = 1/2 (itype).
|
||||
SetA32("111101001x10xxxxxxxxxx11xxxxxxxx", InstName.Vld4, InstEmit32.Vld4, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101000x10xxxxxxxx000xxxxxxxxx", InstName.Vld4, InstEmit32.Vld4, OpCode32SimdMemPair.Create); // Inc = 1/2 (itype).
|
||||
SetA32("111101001x10xxxxxxxx0x01xxxxxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101001x10xxxxxxxx1001xx0xxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101001x10xxxxxxxx1101<<xxxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101000x10xxxxxxxx100x<<0xxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemPair.Create); // Regs = 1, inc = 1/2 (itype).
|
||||
SetA32("111101000x10xxxxxxxx100x<<10xxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemPair.Create); // Regs = 1, inc = 1/2 (itype).
|
||||
SetA32("111101000x10xxxxxxxx0011<<xxxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemPair.Create); // Regs = 2, inc = 2.
|
||||
SetA32("111101001x10xxxxxxxx0x10xxx0xxxx", InstName.Vld3, InstEmit32.Vld3, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101001x10xxxxxxxx1010xx00xxxx", InstName.Vld3, InstEmit32.Vld3, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101001x10xxxxxxxx1110<<x0xxxx", InstName.Vld3, InstEmit32.Vld3, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101000x10xxxxxxxx010x<<0xxxxx", InstName.Vld3, InstEmit32.Vld3, OpCode32SimdMemPair.Create); // Inc = 1/2 (itype).
|
||||
SetA32("111101001x10xxxxxxxx0x11xxxxxxxx", InstName.Vld4, InstEmit32.Vld4, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101001x10xxxxxxxx1011xx<<xxxx", InstName.Vld4, InstEmit32.Vld4, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101001x10xxxxxxxx1111<<x>xxxx", InstName.Vld4, InstEmit32.Vld4, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101000x10xxxxxxxx000x<<xxxxxx", InstName.Vld4, InstEmit32.Vld4, OpCode32SimdMemPair.Create); // Inc = 1/2 (itype).
|
||||
SetA32("1111001x0x<<xxxxxxxx0110xxx0xxxx", InstName.Vmax, InstEmit32.Vmax_I, OpCode32SimdReg.Create);
|
||||
SetA32("111100100x00xxxxxxxx1111xxx0xxxx", InstName.Vmax, InstEmit32.Vmax_V, OpCode32SimdReg.Create);
|
||||
SetA32("1111001x0x<<xxxxxxxx0110xxx1xxxx", InstName.Vmin, InstEmit32.Vmin_I, OpCode32SimdReg.Create);
|
||||
|
@ -974,18 +987,25 @@ namespace ARMeilleure.Decoders
|
|||
SetA32("1111001x1x>>>xxxxxxx0000>xx1xxxx", InstName.Vshr, InstEmit32.Vshr, OpCode32SimdShImm.Create);
|
||||
SetA32("111100101x>>>xxxxxxx100000x1xxx0", InstName.Vshrn, InstEmit32.Vshrn, OpCode32SimdShImmNarrow.Create);
|
||||
SetA32("1111001x1x>>>xxxxxxx0001>xx1xxxx", InstName.Vsra, InstEmit32.Vsra, OpCode32SimdShImm.Create);
|
||||
SetA32("111101001x00xxxxxxxx<<00xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101000x00xxxxxxxx0111xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create); // Regs = 1.
|
||||
SetA32("111101000x00xxxxxxxx1010xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create); // Regs = 2.
|
||||
SetA32("111101000x00xxxxxxxx0110xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create); // Regs = 3.
|
||||
SetA32("111101001x00xxxxxxxx0000xxx0xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101001x00xxxxxxxx0100xx0xxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101001x00xxxxxxxx1000x000xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101001x00xxxxxxxx1000x011xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101000x00xxxxxxxx0111xx0xxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create); // Regs = 1.
|
||||
SetA32("111101000x00xxxxxxxx1010xx<<xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create); // Regs = 2.
|
||||
SetA32("111101000x00xxxxxxxx0110xx0xxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create); // Regs = 3.
|
||||
SetA32("111101000x00xxxxxxxx0010xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create); // Regs = 4.
|
||||
SetA32("111101001x00xxxxxxxx<<01xxxxxxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101000x00xxxxxxxx100xxxxxxxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemPair.Create); // Regs = 1, inc = 1/2 (itype).
|
||||
SetA32("111101000x00xxxxxxxx0011xxxxxxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemPair.Create); // Regs = 2, inc = 2.
|
||||
SetA32("111101001x00xxxxxxxx<<10xxxxxxxx", InstName.Vst3, InstEmit32.Vst3, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101000x00xxxxxxxx010xxxxxxxxx", InstName.Vst3, InstEmit32.Vst3, OpCode32SimdMemPair.Create); // Inc = 1/2 (itype).
|
||||
SetA32("111101001x00xxxxxxxx<<11xxxxxxxx", InstName.Vst4, InstEmit32.Vst4, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101000x00xxxxxxxx000xxxxxxxxx", InstName.Vst4, InstEmit32.Vst4, OpCode32SimdMemPair.Create); // Inc = 1/2 (itype).
|
||||
SetA32("111101001x00xxxxxxxx0x01xxxxxxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101001x00xxxxxxxx1001xx0xxxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101000x00xxxxxxxx100x<<0xxxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemPair.Create); // Regs = 1, inc = 1/2 (itype).
|
||||
SetA32("111101000x00xxxxxxxx100x<<10xxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemPair.Create); // Regs = 1, inc = 1/2 (itype).
|
||||
SetA32("111101000x00xxxxxxxx0011<<xxxxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemPair.Create); // Regs = 2, inc = 2.
|
||||
SetA32("111101001x00xxxxxxxx0x10xxx0xxxx", InstName.Vst3, InstEmit32.Vst3, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101001x00xxxxxxxx1010xx00xxxx", InstName.Vst3, InstEmit32.Vst3, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101000x00xxxxxxxx010x<<0xxxxx", InstName.Vst3, InstEmit32.Vst3, OpCode32SimdMemPair.Create); // Inc = 1/2 (itype).
|
||||
SetA32("111101001x00xxxxxxxx0x11xxxxxxxx", InstName.Vst4, InstEmit32.Vst4, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101001x00xxxxxxxx1011xx<<xxxx", InstName.Vst4, InstEmit32.Vst4, OpCode32SimdMemSingle.Create);
|
||||
SetA32("111101000x00xxxxxxxx000x<<xxxxxx", InstName.Vst4, InstEmit32.Vst4, OpCode32SimdMemPair.Create); // Inc = 1/2 (itype).
|
||||
SetA32("111100110xxxxxxxxxxx1000xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_I, OpCode32SimdReg.Create);
|
||||
SetA32("111100100x10xxxxxxxx1101xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_V, OpCode32SimdReg.Create);
|
||||
SetA32("1111001x1x<<xxxxxxx00010x0x0xxxx", InstName.Vsubl, InstEmit32.Vsubl_I, OpCode32SimdRegLong.Create);
|
||||
|
|
|
@ -67,7 +67,7 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
// Write an element from a double simd register.
|
||||
// Accesses an element from a double simd register.
|
||||
Operand address = context.Add(n, Const(offset));
|
||||
if (eBytes == 8)
|
||||
{
|
||||
|
@ -131,6 +131,7 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
OpCode32SimdMemPair op = (OpCode32SimdMemPair)context.CurrOp;
|
||||
|
||||
int increment = count > 1 ? op.Increment : 1;
|
||||
int eBytes = 1 << op.Size;
|
||||
|
||||
Operand n = context.Copy(GetIntA32(context, op.Rn));
|
||||
|
@ -144,7 +145,7 @@ namespace ARMeilleure.Instructions
|
|||
int elemD = d + reg;
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
// Write an element from a double simd register
|
||||
// Accesses an element from a double simd register,
|
||||
// add ebytes for each element.
|
||||
Operand address = context.Add(n, Const(offset));
|
||||
int index = ((elemD & 1) << (3 - op.Size)) + elem;
|
||||
|
@ -161,7 +162,6 @@ namespace ARMeilleure.Instructions
|
|||
}
|
||||
else
|
||||
{
|
||||
|
||||
if (load)
|
||||
{
|
||||
EmitLoadSimd(context, address, GetVecA32(elemD >> 1), elemD >> 1, index, op.Size);
|
||||
|
@ -173,7 +173,7 @@ namespace ARMeilleure.Instructions
|
|||
}
|
||||
|
||||
offset += eBytes;
|
||||
elemD += op.Increment;
|
||||
elemD += increment;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ namespace ARMeilleure.Translation.PTC
|
|||
private const string OuterHeaderMagicString = "PTCohd\0\0";
|
||||
private const string InnerHeaderMagicString = "PTCihd\0\0";
|
||||
|
||||
private const uint InternalVersion = 3683; //! To be incremented manually for each change to the ARMeilleure project.
|
||||
private const uint InternalVersion = 3695; //! To be incremented manually for each change to the ARMeilleure project.
|
||||
|
||||
private const string ActualDir = "0";
|
||||
private const string BackupDir = "1";
|
||||
|
|
|
@ -12,7 +12,7 @@ namespace Ryujinx.Tests.Cpu
|
|||
#if SimdMemory32
|
||||
private const int RndCntImm = 2;
|
||||
|
||||
private uint[] LDSTModes =
|
||||
private uint[] _ldStModes =
|
||||
{
|
||||
// LD1
|
||||
0b0111,
|
||||
|
@ -96,7 +96,7 @@ namespace Ryujinx.Tests.Cpu
|
|||
[Values(0u, 13u)] uint rn,
|
||||
[Values(1u, 13u, 15u)] uint rm,
|
||||
[Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd,
|
||||
[Range(0u, 3u)] uint mode,
|
||||
[Range(0u, 10u)] uint mode,
|
||||
[Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint offset)
|
||||
{
|
||||
var data = GenerateVectorSequence(0x1000);
|
||||
|
@ -104,7 +104,13 @@ namespace Ryujinx.Tests.Cpu
|
|||
|
||||
uint opcode = 0xf4200000u; // VLD4.8 {D0, D1, D2, D3}, [R0], R0
|
||||
|
||||
opcode |= ((size & 3) << 6) | ((rn & 15) << 16) | (rm & 15) | (LDSTModes[mode] << 8);
|
||||
if (mode > 3 && size == 3)
|
||||
{
|
||||
// A size of 3 is only valid for VLD1.
|
||||
size = 2;
|
||||
}
|
||||
|
||||
opcode |= ((size & 3) << 6) | ((rn & 15) << 16) | (rm & 15) | (_ldStModes[mode] << 8);
|
||||
|
||||
opcode |= ((vd & 0x10) << 18);
|
||||
opcode |= ((vd & 0xf) << 12);
|
||||
|
@ -151,17 +157,23 @@ namespace Ryujinx.Tests.Cpu
|
|||
[Values(0u, 13u)] uint rn,
|
||||
[Values(1u, 13u, 15u)] uint rm,
|
||||
[Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd,
|
||||
[Range(0u, 3u)] uint mode,
|
||||
[Range(0u, 10u)] uint mode,
|
||||
[Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint offset)
|
||||
{
|
||||
var data = GenerateVectorSequence(0x1000);
|
||||
SetWorkingMemory(0, data);
|
||||
|
||||
(V128 vec1, V128 vec2, V128 vec3, V128 vec4) = GenerateTestVectors();
|
||||
|
||||
|
||||
uint opcode = 0xf4000000u; // VST4.8 {D0, D1, D2, D3}, [R0], R0
|
||||
|
||||
opcode |= ((size & 3) << 6) | ((rn & 15) << 16) | (rm & 15) | (LDSTModes[mode] << 8);
|
||||
if (mode > 3 && size == 3)
|
||||
{
|
||||
// A size of 3 is only valid for VST1.
|
||||
size = 2;
|
||||
}
|
||||
|
||||
opcode |= ((size & 3) << 6) | ((rn & 15) << 16) | (rm & 15) | (_ldStModes[mode] << 8);
|
||||
|
||||
opcode |= ((vd & 0x10) << 18);
|
||||
opcode |= ((vd & 0xf) << 12);
|
||||
|
@ -183,7 +195,8 @@ namespace Ryujinx.Tests.Cpu
|
|||
|
||||
uint opcode = 0xec100a00u; // VST4.8 {D0, D1, D2, D3}, [R0], R0
|
||||
|
||||
uint[] vldmModes = {
|
||||
uint[] vldmModes =
|
||||
{
|
||||
// Note: 3rd 0 leaves a space for "D".
|
||||
0b0100, // Increment after.
|
||||
0b0101, // Increment after. (!)
|
||||
|
@ -240,7 +253,7 @@ namespace Ryujinx.Tests.Cpu
|
|||
{
|
||||
opcode |= ((sd & 0x1) << 22);
|
||||
opcode |= ((sd & 0x1e) << 11);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
opcode |= ((sd & 0x10) << 18);
|
||||
|
|
Reference in a new issue