Use movd,movq for i32/64 VectorExtract %x, 0x0 (#1439)

* Use movd,movq for i32/64 VectorExtract %x, 0x0

* Increment PPTC interval version

* Use else-if instead

- Address gdkchan's feedback.
- Clean up Debug.Assert calls

* Inline `count` expression into Debug.Assert

Apparently the CoreCLR JIT will not eliminate this. :(
This commit is contained in:
Ficture Seven 2020-07-30 09:52:26 +04:00 committed by GitHub
parent 991784868f
commit b3c051bbec
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 24 additions and 34 deletions

View file

@ -1148,62 +1148,52 @@ namespace ARMeilleure.CodeGen.X86
byte index = src2.AsByte(); byte index = src2.AsByte();
Debug.Assert(index < OperandType.V128.GetSizeInBytes() / dest.Type.GetSizeInBytes());
if (dest.Type == OperandType.I32) if (dest.Type == OperandType.I32)
{ {
Debug.Assert(index < 4); if (index == 0)
{
if (HardwareCapabilities.SupportsSse41) context.Assembler.Movd(dest, src1);
}
else if (HardwareCapabilities.SupportsSse41)
{ {
context.Assembler.Pextrd(dest, src1, index); context.Assembler.Pextrd(dest, src1, index);
} }
else else
{ {
if (index != 0) int mask0 = 0b11_10_01_00;
{ int mask1 = 0b11_10_01_00;
int mask0 = 0b11_10_01_00;
int mask1 = 0b11_10_01_00;
mask0 = BitUtils.RotateRight(mask0, index * 2, 8); mask0 = BitUtils.RotateRight(mask0, index * 2, 8);
mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8); mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8);
context.Assembler.Pshufd(src1, src1, (byte)mask0); context.Assembler.Pshufd(src1, src1, (byte)mask0);
context.Assembler.Movd (dest, src1); context.Assembler.Movd (dest, src1);
context.Assembler.Pshufd(src1, src1, (byte)mask1); context.Assembler.Pshufd(src1, src1, (byte)mask1);
}
else
{
context.Assembler.Movd(dest, src1);
}
} }
} }
else if (dest.Type == OperandType.I64) else if (dest.Type == OperandType.I64)
{ {
Debug.Assert(index < 2); if (index == 0)
{
if (HardwareCapabilities.SupportsSse41) context.Assembler.Movq(dest, src1);
}
else if (HardwareCapabilities.SupportsSse41)
{ {
context.Assembler.Pextrq(dest, src1, index); context.Assembler.Pextrq(dest, src1, index);
} }
else else
{ {
if (index != 0) const byte mask = 0b01_00_11_10;
{
const byte mask = 0b01_00_11_10;
context.Assembler.Pshufd(src1, src1, mask); context.Assembler.Pshufd(src1, src1, mask);
context.Assembler.Movq (dest, src1); context.Assembler.Movq (dest, src1);
context.Assembler.Pshufd(src1, src1, mask); context.Assembler.Pshufd(src1, src1, mask);
}
else
{
context.Assembler.Movq(dest, src1);
}
} }
} }
else else
{ {
Debug.Assert(index < (dest.Type == OperandType.FP32 ? 4 : 2));
// Floating-point types. // Floating-point types.
if ((index >= 2 && dest.Type == OperandType.FP32) || if ((index >= 2 && dest.Type == OperandType.FP32) ||
(index == 1 && dest.Type == OperandType.FP64)) (index == 1 && dest.Type == OperandType.FP64))

View file

@ -20,7 +20,7 @@ namespace ARMeilleure.Translation.PTC
{ {
private const string HeaderMagic = "PTChd"; private const string HeaderMagic = "PTChd";
private const int InternalVersion = 14; //! To be incremented manually for each change to the ARMeilleure project. private const int InternalVersion = 17; //! To be incremented manually for each change to the ARMeilleure project.
private const string BaseDir = "Ryujinx"; private const string BaseDir = "Ryujinx";