Archived
1
0
Fork 0
forked from Mirror/Ryujinx

Fix increment on Arm32 NEON VLDn/VSTn instructions with regs > 1 (#3695)

* Fix increment on Arm32 NEON VLDn/VSTn instructions with regs > 1

* PPTC version bump

* PR feedback
This commit is contained in:
gdkchan 2022-09-13 03:24:09 -03:00 committed by GitHub
parent 2492e7e808
commit 729ff5337c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 71 additions and 39 deletions

View file

@ -1,11 +1,10 @@
using ARMeilleure.State;
using System;
namespace ARMeilleure.Decoders
{
class OpCode32SimdMemPair : OpCode32, IOpCode32Simd
{
private static int[] RegsMap =
private static int[] _regsMap =
{
1, 1, 4, 2,
1, 1, 3, 1,
@ -40,9 +39,9 @@ namespace ARMeilleure.Decoders
WBack = Rm != RegisterAlias.Aarch32Pc;
RegisterIndex = Rm != RegisterAlias.Aarch32Pc && Rm != RegisterAlias.Aarch32Sp;
Regs = RegsMap[(opCode >> 8) & 0xf];
Regs = _regsMap[(opCode >> 8) & 0xf];
Increment = Math.Min(Regs, ((opCode >> 8) & 0x1) + 1);
Increment = ((opCode >> 8) & 0x1) + 1;
}
}
}

View file

@ -888,18 +888,31 @@ namespace ARMeilleure.Decoders
SetA32("111100100x00xxxxxxxx1100xxx1xxxx", InstName.Vfma, InstEmit32.Vfma_V, OpCode32SimdReg.Create);
SetA32("111100100x10xxxxxxxx1100xxx1xxxx", InstName.Vfms, InstEmit32.Vfms_V, OpCode32SimdReg.Create);
SetA32("1111001x0x<<xxxxxxxx0000xxx0xxxx", InstName.Vhadd, InstEmit32.Vhadd, OpCode32SimdReg.Create);
SetA32("111101001x10xxxxxxxxxx00xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create);
SetA32("111101000x10xxxxxxxx0111xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemPair.Create); // Regs = 1.
SetA32("111101000x10xxxxxxxx1010xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemPair.Create); // Regs = 2.
SetA32("111101000x10xxxxxxxx0110xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemPair.Create); // Regs = 3.
SetA32("111101001x10xxxxxxxx0000xxx0xxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create);
SetA32("111101001x10xxxxxxxx0100xx0xxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create);
SetA32("111101001x10xxxxxxxx1000x000xxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create);
SetA32("111101001x10xxxxxxxx1000x011xxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create);
SetA32("111101001x10xxxxxxxx110000x0xxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create);
SetA32("111101001x10xxxxxxxx110001xxxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create);
SetA32("111101001x10xxxxxxxx110010xxxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create);
SetA32("111101000x10xxxxxxxx0111xx0xxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemPair.Create); // Regs = 1.
SetA32("111101000x10xxxxxxxx1010xx<<xxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemPair.Create); // Regs = 2.
SetA32("111101000x10xxxxxxxx0110xx0xxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemPair.Create); // Regs = 3.
SetA32("111101000x10xxxxxxxx0010xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemPair.Create); // Regs = 4.
SetA32("111101001x10xxxxxxxxxx01xxxxxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemSingle.Create);
SetA32("111101000x10xxxxxxxx100xxxxxxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemPair.Create); // Regs = 1, inc = 1/2 (itype).
SetA32("111101000x10xxxxxxxx0011xxxxxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemPair.Create); // Regs = 2, inc = 2.
SetA32("111101001x10xxxxxxxxxx10xxxxxxxx", InstName.Vld3, InstEmit32.Vld3, OpCode32SimdMemSingle.Create);
SetA32("111101000x10xxxxxxxx010xxxxxxxxx", InstName.Vld3, InstEmit32.Vld3, OpCode32SimdMemPair.Create); // Inc = 1/2 (itype).
SetA32("111101001x10xxxxxxxxxx11xxxxxxxx", InstName.Vld4, InstEmit32.Vld4, OpCode32SimdMemSingle.Create);
SetA32("111101000x10xxxxxxxx000xxxxxxxxx", InstName.Vld4, InstEmit32.Vld4, OpCode32SimdMemPair.Create); // Inc = 1/2 (itype).
SetA32("111101001x10xxxxxxxx0x01xxxxxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemSingle.Create);
SetA32("111101001x10xxxxxxxx1001xx0xxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemSingle.Create);
SetA32("111101001x10xxxxxxxx1101<<xxxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemSingle.Create);
SetA32("111101000x10xxxxxxxx100x<<0xxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemPair.Create); // Regs = 1, inc = 1/2 (itype).
SetA32("111101000x10xxxxxxxx100x<<10xxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemPair.Create); // Regs = 1, inc = 1/2 (itype).
SetA32("111101000x10xxxxxxxx0011<<xxxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemPair.Create); // Regs = 2, inc = 2.
SetA32("111101001x10xxxxxxxx0x10xxx0xxxx", InstName.Vld3, InstEmit32.Vld3, OpCode32SimdMemSingle.Create);
SetA32("111101001x10xxxxxxxx1010xx00xxxx", InstName.Vld3, InstEmit32.Vld3, OpCode32SimdMemSingle.Create);
SetA32("111101001x10xxxxxxxx1110<<x0xxxx", InstName.Vld3, InstEmit32.Vld3, OpCode32SimdMemSingle.Create);
SetA32("111101000x10xxxxxxxx010x<<0xxxxx", InstName.Vld3, InstEmit32.Vld3, OpCode32SimdMemPair.Create); // Inc = 1/2 (itype).
SetA32("111101001x10xxxxxxxx0x11xxxxxxxx", InstName.Vld4, InstEmit32.Vld4, OpCode32SimdMemSingle.Create);
SetA32("111101001x10xxxxxxxx1011xx<<xxxx", InstName.Vld4, InstEmit32.Vld4, OpCode32SimdMemSingle.Create);
SetA32("111101001x10xxxxxxxx1111<<x>xxxx", InstName.Vld4, InstEmit32.Vld4, OpCode32SimdMemSingle.Create);
SetA32("111101000x10xxxxxxxx000x<<xxxxxx", InstName.Vld4, InstEmit32.Vld4, OpCode32SimdMemPair.Create); // Inc = 1/2 (itype).
SetA32("1111001x0x<<xxxxxxxx0110xxx0xxxx", InstName.Vmax, InstEmit32.Vmax_I, OpCode32SimdReg.Create);
SetA32("111100100x00xxxxxxxx1111xxx0xxxx", InstName.Vmax, InstEmit32.Vmax_V, OpCode32SimdReg.Create);
SetA32("1111001x0x<<xxxxxxxx0110xxx1xxxx", InstName.Vmin, InstEmit32.Vmin_I, OpCode32SimdReg.Create);
@ -974,18 +987,25 @@ namespace ARMeilleure.Decoders
SetA32("1111001x1x>>>xxxxxxx0000>xx1xxxx", InstName.Vshr, InstEmit32.Vshr, OpCode32SimdShImm.Create);
SetA32("111100101x>>>xxxxxxx100000x1xxx0", InstName.Vshrn, InstEmit32.Vshrn, OpCode32SimdShImmNarrow.Create);
SetA32("1111001x1x>>>xxxxxxx0001>xx1xxxx", InstName.Vsra, InstEmit32.Vsra, OpCode32SimdShImm.Create);
SetA32("111101001x00xxxxxxxx<<00xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create);
SetA32("111101000x00xxxxxxxx0111xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create); // Regs = 1.
SetA32("111101000x00xxxxxxxx1010xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create); // Regs = 2.
SetA32("111101000x00xxxxxxxx0110xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create); // Regs = 3.
SetA32("111101001x00xxxxxxxx0000xxx0xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create);
SetA32("111101001x00xxxxxxxx0100xx0xxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create);
SetA32("111101001x00xxxxxxxx1000x000xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create);
SetA32("111101001x00xxxxxxxx1000x011xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create);
SetA32("111101000x00xxxxxxxx0111xx0xxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create); // Regs = 1.
SetA32("111101000x00xxxxxxxx1010xx<<xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create); // Regs = 2.
SetA32("111101000x00xxxxxxxx0110xx0xxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create); // Regs = 3.
SetA32("111101000x00xxxxxxxx0010xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create); // Regs = 4.
SetA32("111101001x00xxxxxxxx<<01xxxxxxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemSingle.Create);
SetA32("111101000x00xxxxxxxx100xxxxxxxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemPair.Create); // Regs = 1, inc = 1/2 (itype).
SetA32("111101000x00xxxxxxxx0011xxxxxxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemPair.Create); // Regs = 2, inc = 2.
SetA32("111101001x00xxxxxxxx<<10xxxxxxxx", InstName.Vst3, InstEmit32.Vst3, OpCode32SimdMemSingle.Create);
SetA32("111101000x00xxxxxxxx010xxxxxxxxx", InstName.Vst3, InstEmit32.Vst3, OpCode32SimdMemPair.Create); // Inc = 1/2 (itype).
SetA32("111101001x00xxxxxxxx<<11xxxxxxxx", InstName.Vst4, InstEmit32.Vst4, OpCode32SimdMemSingle.Create);
SetA32("111101000x00xxxxxxxx000xxxxxxxxx", InstName.Vst4, InstEmit32.Vst4, OpCode32SimdMemPair.Create); // Inc = 1/2 (itype).
SetA32("111101001x00xxxxxxxx0x01xxxxxxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemSingle.Create);
SetA32("111101001x00xxxxxxxx1001xx0xxxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemSingle.Create);
SetA32("111101000x00xxxxxxxx100x<<0xxxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemPair.Create); // Regs = 1, inc = 1/2 (itype).
SetA32("111101000x00xxxxxxxx100x<<10xxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemPair.Create); // Regs = 1, inc = 1/2 (itype).
SetA32("111101000x00xxxxxxxx0011<<xxxxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemPair.Create); // Regs = 2, inc = 2.
SetA32("111101001x00xxxxxxxx0x10xxx0xxxx", InstName.Vst3, InstEmit32.Vst3, OpCode32SimdMemSingle.Create);
SetA32("111101001x00xxxxxxxx1010xx00xxxx", InstName.Vst3, InstEmit32.Vst3, OpCode32SimdMemSingle.Create);
SetA32("111101000x00xxxxxxxx010x<<0xxxxx", InstName.Vst3, InstEmit32.Vst3, OpCode32SimdMemPair.Create); // Inc = 1/2 (itype).
SetA32("111101001x00xxxxxxxx0x11xxxxxxxx", InstName.Vst4, InstEmit32.Vst4, OpCode32SimdMemSingle.Create);
SetA32("111101001x00xxxxxxxx1011xx<<xxxx", InstName.Vst4, InstEmit32.Vst4, OpCode32SimdMemSingle.Create);
SetA32("111101000x00xxxxxxxx000x<<xxxxxx", InstName.Vst4, InstEmit32.Vst4, OpCode32SimdMemPair.Create); // Inc = 1/2 (itype).
SetA32("111100110xxxxxxxxxxx1000xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_I, OpCode32SimdReg.Create);
SetA32("111100100x10xxxxxxxx1101xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_V, OpCode32SimdReg.Create);
SetA32("1111001x1x<<xxxxxxx00010x0x0xxxx", InstName.Vsubl, InstEmit32.Vsubl_I, OpCode32SimdRegLong.Create);

View file

@ -67,7 +67,7 @@ namespace ARMeilleure.Instructions
for (int i = 0; i < count; i++)
{
// Write an element from a double simd register.
// Accesses an element from a double simd register.
Operand address = context.Add(n, Const(offset));
if (eBytes == 8)
{
@ -131,6 +131,7 @@ namespace ARMeilleure.Instructions
{
OpCode32SimdMemPair op = (OpCode32SimdMemPair)context.CurrOp;
int increment = count > 1 ? op.Increment : 1;
int eBytes = 1 << op.Size;
Operand n = context.Copy(GetIntA32(context, op.Rn));
@ -144,7 +145,7 @@ namespace ARMeilleure.Instructions
int elemD = d + reg;
for (int i = 0; i < count; i++)
{
// Write an element from a double simd register
// Accesses an element from a double simd register,
// add ebytes for each element.
Operand address = context.Add(n, Const(offset));
int index = ((elemD & 1) << (3 - op.Size)) + elem;
@ -161,7 +162,6 @@ namespace ARMeilleure.Instructions
}
else
{
if (load)
{
EmitLoadSimd(context, address, GetVecA32(elemD >> 1), elemD >> 1, index, op.Size);
@ -173,7 +173,7 @@ namespace ARMeilleure.Instructions
}
offset += eBytes;
elemD += op.Increment;
elemD += increment;
}
}
}

View file

@ -27,7 +27,7 @@ namespace ARMeilleure.Translation.PTC
private const string OuterHeaderMagicString = "PTCohd\0\0";
private const string InnerHeaderMagicString = "PTCihd\0\0";
private const uint InternalVersion = 3683; //! To be incremented manually for each change to the ARMeilleure project.
private const uint InternalVersion = 3695; //! To be incremented manually for each change to the ARMeilleure project.
private const string ActualDir = "0";
private const string BackupDir = "1";

View file

@ -12,7 +12,7 @@ namespace Ryujinx.Tests.Cpu
#if SimdMemory32
private const int RndCntImm = 2;
private uint[] LDSTModes =
private uint[] _ldStModes =
{
// LD1
0b0111,
@ -96,7 +96,7 @@ namespace Ryujinx.Tests.Cpu
[Values(0u, 13u)] uint rn,
[Values(1u, 13u, 15u)] uint rm,
[Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd,
[Range(0u, 3u)] uint mode,
[Range(0u, 10u)] uint mode,
[Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint offset)
{
var data = GenerateVectorSequence(0x1000);
@ -104,7 +104,13 @@ namespace Ryujinx.Tests.Cpu
uint opcode = 0xf4200000u; // VLD4.8 {D0, D1, D2, D3}, [R0], R0
opcode |= ((size & 3) << 6) | ((rn & 15) << 16) | (rm & 15) | (LDSTModes[mode] << 8);
if (mode > 3 && size == 3)
{
// A size of 3 is only valid for VLD1.
size = 2;
}
opcode |= ((size & 3) << 6) | ((rn & 15) << 16) | (rm & 15) | (_ldStModes[mode] << 8);
opcode |= ((vd & 0x10) << 18);
opcode |= ((vd & 0xf) << 12);
@ -151,7 +157,7 @@ namespace Ryujinx.Tests.Cpu
[Values(0u, 13u)] uint rn,
[Values(1u, 13u, 15u)] uint rm,
[Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd,
[Range(0u, 3u)] uint mode,
[Range(0u, 10u)] uint mode,
[Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint offset)
{
var data = GenerateVectorSequence(0x1000);
@ -161,7 +167,13 @@ namespace Ryujinx.Tests.Cpu
uint opcode = 0xf4000000u; // VST4.8 {D0, D1, D2, D3}, [R0], R0
opcode |= ((size & 3) << 6) | ((rn & 15) << 16) | (rm & 15) | (LDSTModes[mode] << 8);
if (mode > 3 && size == 3)
{
// A size of 3 is only valid for VST1.
size = 2;
}
opcode |= ((size & 3) << 6) | ((rn & 15) << 16) | (rm & 15) | (_ldStModes[mode] << 8);
opcode |= ((vd & 0x10) << 18);
opcode |= ((vd & 0xf) << 12);
@ -183,7 +195,8 @@ namespace Ryujinx.Tests.Cpu
uint opcode = 0xec100a00u; // VST4.8 {D0, D1, D2, D3}, [R0], R0
uint[] vldmModes = {
uint[] vldmModes =
{
// Note: 3rd 0 leaves a space for "D".
0b0100, // Increment after.
0b0101, // Increment after. (!)