From 1bef70c068f8aeb6a3a518b8ca635de19122da14 Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Wed, 13 Mar 2019 09:23:52 +0100 Subject: [PATCH] Add Rshrn_V & Shrn_V Sse opt.. Add Mla_V, Mls_V & Mul_V Sse opt.; add Tests. (#614) * Update CountLeadingZeros(). * Remove obsolete Tests. * Follow-up. * Follow-up. * Follow-up. * Add Mla_V, Mls_V & Mul_V Tests. * Update PackageReferences. * Remove EmitLd/Stvectmp2(). * Remove Dup. Nits. * Remove EmitLd/Stvectmp2() & Dup; nits. * Remove Tmp stuff & Dup; rework Fcvtz() as Fcvtn(). * Remove Tmp stuff, EmitLd/Stvectmp2() & Dup. Nits. * Add (R)shrn_V Sse opt.; add "Part" & "Shift" opt.. Remove Tmp stuff; remove Dup. Nits. * Add Mla/Mls/Mul_V Sse opt.. Add "Part" opt.. Remove EmitLd/Stvectmp2(), remove Dup. Nits. * Nits. * Nits. * Nit. * Add "Part" opt.. Nit. * Nit. * Nit. * Add Cmhi_V & Cmhs_V Sse opt.. --- .../Instructions/InstEmitSimdArithmetic.cs | 407 ++++++++++++------ ChocolArm64/Instructions/InstEmitSimdCmp.cs | 109 +++-- ChocolArm64/Instructions/InstEmitSimdCvt.cs | 195 +++------ .../Instructions/InstEmitSimdHelper.cs | 83 ++-- .../Instructions/InstEmitSimdLogical.cs | 33 +- ChocolArm64/Instructions/InstEmitSimdShift.cs | 187 +++++--- ChocolArm64/Translation/ILEmitterCtx.cs | 6 +- Ryujinx.Common/Utilities/BitUtils.cs | 4 +- Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs | 34 -- Ryujinx.Tests/Cpu/CpuTestSimdExt.cs | 7 +- Ryujinx.Tests/Cpu/CpuTestSimdIns.cs | 11 +- Ryujinx.Tests/Cpu/CpuTestSimdReg.cs | 64 +++ Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs | 47 +- Ryujinx.Tests/Ryujinx.Tests.csproj | 4 +- 14 files changed, 707 insertions(+), 484 deletions(-) diff --git a/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs index f7236e9a4a..5ceea77491 100644 --- a/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs @@ -1,4 +1,5 @@ // https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h +// https://www.agner.org/optimize/#vectorclass @ vectori128.h using ChocolArm64.Decoders; using ChocolArm64.State; @@ -184,8 +185,8 @@ namespace ChocolArm64.Instructions if (sizeF == 0) { - Type[] typesSsv = new Type[] { typeof(float) }; - Type[] typesSubAndNot = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesSsv = new Type[] { typeof(float) }; + Type[] typesSubAnt = new Type[] { typeof(Vector128), typeof(Vector128) }; context.EmitLdc_R4(-0f); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv)); @@ -193,8 +194,8 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), typesSubAndNot)); - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), typesSubAnt)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAnt)); context.EmitStvec(op.Rd); @@ -202,8 +203,8 @@ namespace ChocolArm64.Instructions } else /* if (sizeF == 1) */ { - Type[] typesSsv = new Type[] { typeof(double) }; - Type[] typesSubAndNot = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesSsv = new Type[] { typeof(double) }; + Type[] typesSubAnt = new Type[] { typeof(Vector128), typeof(Vector128) }; context.EmitLdc_R8(-0d); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv)); @@ -211,8 +212,8 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesSubAndNot)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesSubAnt)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAnt)); context.EmitStvec(op.Rd); @@ -240,8 +241,8 @@ namespace ChocolArm64.Instructions if (sizeF == 0) { - Type[] typesSav = new Type[] { typeof(float) }; - Type[] typesSubAndNot = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesSav = new Type[] { typeof(float) }; + Type[] typesSubAnt = new Type[] { typeof(Vector128), typeof(Vector128) }; context.EmitLdc_R4(-0f); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav)); @@ -249,8 +250,8 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesSubAndNot)); - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesSubAnt)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAnt)); context.EmitStvec(op.Rd); @@ -261,8 +262,8 @@ namespace ChocolArm64.Instructions } else /* if (sizeF == 1) */ { - Type[] typesSav = new Type[] { typeof(double) }; - Type[] typesSubAndNot = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesSav = new Type[] { typeof(double) }; + Type[] typesSubAnt = new Type[] { typeof(Vector128), typeof(Vector128) }; context.EmitLdc_R8(-0d); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); @@ -270,8 +271,8 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAndNot)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAnt)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAnt)); context.EmitStvec(op.Rd); } @@ -295,15 +296,15 @@ namespace ChocolArm64.Instructions if (op.Size == 0) { - Type[] typesSsv = new Type[] { typeof(float) }; - Type[] typesAndNot = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesSsv = new Type[] { typeof(float) }; + Type[] typesAnt = new Type[] { typeof(Vector128), typeof(Vector128) }; context.EmitLdc_R4(-0f); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv)); context.EmitLdvec(op.Rn); - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAndNot)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAnt)); context.EmitStvec(op.Rd); @@ -311,15 +312,15 @@ namespace ChocolArm64.Instructions } else /* if (op.Size == 1) */ { - Type[] typesSsv = new Type[] { typeof(double) }; - Type[] typesAndNot = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesSsv = new Type[] { typeof(double) }; + Type[] typesAnt = new Type[] { typeof(Vector128), typeof(Vector128) }; context.EmitLdc_R8(-0d); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv)); context.EmitLdvec(op.Rn); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt)); context.EmitStvec(op.Rd); @@ -345,15 +346,15 @@ namespace ChocolArm64.Instructions if (sizeF == 0) { - Type[] typesSav = new Type[] { typeof(float) }; - Type[] typesAndNot = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesSav = new Type[] { typeof(float) }; + Type[] typesAnt = new Type[] { typeof(Vector128), typeof(Vector128) }; context.EmitLdc_R4(-0f); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav)); context.EmitLdvec(op.Rn); - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAndNot)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAnt)); context.EmitStvec(op.Rd); @@ -364,15 +365,15 @@ namespace ChocolArm64.Instructions } else /* if (sizeF == 1) */ { - Type[] typesSav = new Type[] { typeof(double) }; - Type[] typesAndNot = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesSav = new Type[] { typeof(double) }; + Type[] typesAnt = new Type[] { typeof(Vector128), typeof(Vector128) }; context.EmitLdc_R8(-0d); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); context.EmitLdvec(op.Rn); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt)); context.EmitStvec(op.Rd); } @@ -429,7 +430,7 @@ namespace ChocolArm64.Instructions Type[] typesAddH = new Type[] { typeof(Vector128), typeof(Vector128) }; context.EmitLdvec(op.Rn); - context.Emit(OpCodes.Dup); + context.EmitLdvec(op.Rn); context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH)); @@ -442,7 +443,7 @@ namespace ChocolArm64.Instructions Type[] typesAddH = new Type[] { typeof(Vector128), typeof(Vector128) }; context.EmitLdvec(op.Rn); - context.Emit(OpCodes.Dup); + context.EmitLdvec(op.Rn); context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH)); @@ -748,11 +749,13 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rn); + + context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm); - context.Emit(OpCodes.Dup); context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6); - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulAdd)); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Add), typesMulAdd)); @@ -770,11 +773,13 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rn); + + context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm); - context.Emit(OpCodes.Dup); context.EmitLdc_I4(op.Index | op.Index << 1); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulAdd)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd)); @@ -863,11 +868,13 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rn); + + context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm); - context.Emit(OpCodes.Dup); context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6); - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulSub)); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesMulSub)); @@ -885,11 +892,13 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rn); + + context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm); - context.Emit(OpCodes.Dup); context.EmitLdc_I4(op.Index | op.Index << 1); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); @@ -1000,11 +1009,13 @@ namespace ChocolArm64.Instructions Type[] typesMul = new Type[] { typeof(Vector128), typeof(Vector128) }; context.EmitLdvec(op.Rn); + + context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm); - context.Emit(OpCodes.Dup); context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6); - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMul)); context.EmitStvec(op.Rd); @@ -1020,11 +1031,13 @@ namespace ChocolArm64.Instructions Type[] typesMul = new Type[] { typeof(Vector128), typeof(Vector128) }; context.EmitLdvec(op.Rn); + + context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm); - context.Emit(OpCodes.Dup); context.EmitLdc_I4(op.Index | op.Index << 1); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMul)); context.EmitStvec(op.Rd); @@ -1772,11 +1785,18 @@ namespace ChocolArm64.Instructions public static void Mla_V(ILEmitterCtx context) { - EmitVectorTernaryOpZx(context, () => + if (Optimizations.UseSse41) { - context.Emit(OpCodes.Mul); - context.Emit(OpCodes.Add); - }); + EmitSse41Mul_AddSub(context, nameof(Sse2.Add)); + } + else + { + EmitVectorTernaryOpZx(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Add); + }); + } } public static void Mla_Ve(ILEmitterCtx context) @@ -1790,11 +1810,18 @@ namespace ChocolArm64.Instructions public static void Mls_V(ILEmitterCtx context) { - EmitVectorTernaryOpZx(context, () => + if (Optimizations.UseSse41) { - context.Emit(OpCodes.Mul); - context.Emit(OpCodes.Sub); - }); + EmitSse41Mul_AddSub(context, nameof(Sse2.Subtract)); + } + else + { + EmitVectorTernaryOpZx(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Sub); + }); + } } public static void Mls_Ve(ILEmitterCtx context) @@ -1808,7 +1835,14 @@ namespace ChocolArm64.Instructions public static void Mul_V(ILEmitterCtx context) { - EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Mul)); + if (Optimizations.UseSse41) + { + EmitSse41Mul_AddSub(context); + } + else + { + EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Mul)); + } } public static void Mul_Ve(ILEmitterCtx context) @@ -1923,19 +1957,23 @@ namespace ChocolArm64.Instructions nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int64) }; - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rn); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); context.EmitLdvec(op.Rm); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); @@ -1969,13 +2007,14 @@ namespace ChocolArm64.Instructions nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int64) }; - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); @@ -1999,25 +2038,19 @@ namespace ChocolArm64.Instructions Type[] typesAndXorAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; context.EmitLdvec(op.Rn); - - context.Emit(OpCodes.Dup); - context.EmitStvectmp(); - context.EmitLdvec(op.Rm); - context.Emit(OpCodes.Dup); - context.EmitStvectmp2(); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndXorAdd)); - context.EmitLdvectmp(); - context.EmitLdvectmp2(); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesAndXorAdd)); - context.EmitLdc_I4(1); + context.Emit(OpCodes.Ldc_I4_1); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd)); context.EmitStvec(op.Rd); @@ -2185,20 +2218,24 @@ namespace ChocolArm64.Instructions ? nameof(Sse41.ConvertToVector128Int16) : nameof(Sse41.ConvertToVector128Int32); - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rn); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); context.EmitLdvec(op.Rm); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); @@ -2244,20 +2281,24 @@ namespace ChocolArm64.Instructions ? nameof(Sse41.ConvertToVector128Int16) : nameof(Sse41.ConvertToVector128Int32); - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rn); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); context.EmitLdvec(op.Rm); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); @@ -2441,19 +2482,23 @@ namespace ChocolArm64.Instructions nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int64) }; - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rn); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); context.EmitLdvec(op.Rm); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); @@ -2482,13 +2527,14 @@ namespace ChocolArm64.Instructions nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int64) }; - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); @@ -2594,19 +2640,23 @@ namespace ChocolArm64.Instructions nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int64) }; - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rn); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); context.EmitLdvec(op.Rm); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); @@ -2659,13 +2709,14 @@ namespace ChocolArm64.Instructions nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int64) }; - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); @@ -2689,25 +2740,19 @@ namespace ChocolArm64.Instructions Type[] typesAndXorAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; context.EmitLdvec(op.Rn); - - context.Emit(OpCodes.Dup); - context.EmitStvectmp(); - context.EmitLdvec(op.Rm); - context.Emit(OpCodes.Dup); - context.EmitStvectmp2(); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndXorAdd)); - context.EmitLdvectmp(); - context.EmitLdvectmp2(); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesAndXorAdd)); - context.EmitLdc_I4(1); + context.Emit(OpCodes.Ldc_I4_1); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd)); context.EmitStvec(op.Rd); @@ -2737,8 +2782,7 @@ namespace ChocolArm64.Instructions Type[] typesAvgSub = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; context.EmitLdvec(op.Rn); - context.Emit(OpCodes.Dup); - + context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvgSub)); @@ -2862,20 +2906,24 @@ namespace ChocolArm64.Instructions ? nameof(Sse41.ConvertToVector128Int16) : nameof(Sse41.ConvertToVector128Int32); - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rn); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); context.EmitLdvec(op.Rm); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); @@ -2921,20 +2969,24 @@ namespace ChocolArm64.Instructions ? nameof(Sse41.ConvertToVector128Int16) : nameof(Sse41.ConvertToVector128Int32); - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rn); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); context.EmitLdvec(op.Rm); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); @@ -3063,19 +3115,23 @@ namespace ChocolArm64.Instructions nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int64) }; - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rn); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); context.EmitLdvec(op.Rm); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); @@ -3104,13 +3160,14 @@ namespace ChocolArm64.Instructions nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int64) }; - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); @@ -3253,5 +3310,77 @@ namespace ChocolArm64.Instructions EmitVectorZeroUpper(context, op.Rd); } } + + private static void EmitSse41Mul_AddSub(ILEmitterCtx context, string nameAddSub = null) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (nameAddSub != null) + { + context.EmitLdvec(op.Rd); + } + + if (op.Size == 0) + { + Type[] typesBle = new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) }; + Type[] typesMul = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesShs = new Type[] { typeof(Vector128), typeof(byte) }; + Type[] typesSav = new Type[] { typeof(int) }; + + context.EmitLdvec(op.Rn); + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs)); + + context.EmitLdvec(op.Rm); + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyLow), typesMul)); + + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs)); + + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyLow), typesMul)); + + context.EmitLdc_I4(0x00FF00FF); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + context.EmitCall(typeof(Sse41).GetMethod(nameof(Sse41.BlendVariable), typesBle)); + } + else if (op.Size == 1) + { + Type[] typesMul = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyLow), typesMul)); + } + else /* if (op.Size == 2) */ + { + Type[] typesMul = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse41).GetMethod(nameof(Sse41.MultiplyLow), typesMul)); + } + + if (nameAddSub != null) + { + Type[] typesAddSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; + + context.EmitCall(typeof(Sse2).GetMethod(nameAddSub, typesAddSub)); + } + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } } } diff --git a/ChocolArm64/Instructions/InstEmitSimdCmp.cs b/ChocolArm64/Instructions/InstEmitSimdCmp.cs index c29dcd9dc5..62cf772091 100644 --- a/ChocolArm64/Instructions/InstEmitSimdCmp.cs +++ b/ChocolArm64/Instructions/InstEmitSimdCmp.cs @@ -86,7 +86,42 @@ namespace ChocolArm64.Instructions public static void Cmhi_V(ILEmitterCtx context) { - EmitCmpOp(context, OpCodes.Bgt_Un_S, scalar: false); + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 3) + { + Type[] typesMax = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; + Type[] typesCmp = new Type[] { VectorIntTypesPerSizeLog2 [op.Size], VectorIntTypesPerSizeLog2 [op.Size] }; + Type[] typesAnt = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesSav = new Type[] { typeof(byte) }; + + Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + + context.EmitLdvec(op.Rm); + context.EmitLdvec(op.Rn); + + context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax)); + + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqual), typesCmp)); + + context.EmitLdc_I4(byte.MaxValue); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitCmpOp(context, OpCodes.Bgt_Un_S, scalar: false); + } } public static void Cmhs_S(ILEmitterCtx context) @@ -96,7 +131,35 @@ namespace ChocolArm64.Instructions public static void Cmhs_V(ILEmitterCtx context) { - EmitCmpOp(context, OpCodes.Bge_Un_S, scalar: false); + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 3) + { + Type[] typesMax = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; + Type[] typesCmp = new Type[] { VectorIntTypesPerSizeLog2 [op.Size], VectorIntTypesPerSizeLog2 [op.Size] }; + + Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax)); + + context.EmitLdvec(op.Rn); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqual), typesCmp)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitCmpOp(context, OpCodes.Bge_Un_S, scalar: false); + } } public static void Cmle_S(ILEmitterCtx context) @@ -318,9 +381,6 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rn); - context.Emit(OpCodes.Dup); - context.EmitStvectmp(); - if (cmpWithZero) { VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); @@ -331,7 +391,7 @@ namespace ChocolArm64.Instructions } context.Emit(OpCodes.Dup); - context.EmitStvectmp2(); + context.EmitStvectmp(); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareOrderedScalar), typesCmp)); VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); @@ -340,18 +400,18 @@ namespace ChocolArm64.Instructions context.Emit(OpCodes.Brtrue_S, lblNaN); - context.EmitLdc_I4(0); + context.Emit(OpCodes.Ldc_I4_0); + context.EmitLdvec(op.Rn); context.EmitLdvectmp(); - context.EmitLdvectmp2(); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareGreaterThanOrEqualOrderedScalar), typesCmp)); + context.EmitLdvec(op.Rn); context.EmitLdvectmp(); - context.EmitLdvectmp2(); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareEqualOrderedScalar), typesCmp)); + context.EmitLdvec(op.Rn); context.EmitLdvectmp(); - context.EmitLdvectmp2(); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareLessThanOrderedScalar), typesCmp)); context.EmitStflg((int)PState.NBit); @@ -363,10 +423,10 @@ namespace ChocolArm64.Instructions context.MarkLabel(lblNaN); - context.EmitLdc_I4(1); - context.Emit(OpCodes.Dup); - context.EmitLdc_I4(0); - context.Emit(OpCodes.Dup); + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Ldc_I4_0); + context.Emit(OpCodes.Ldc_I4_0); context.EmitStflg((int)PState.NBit); context.EmitStflg((int)PState.ZBit); @@ -384,9 +444,6 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rn); - context.Emit(OpCodes.Dup); - context.EmitStvectmp(); - if (cmpWithZero) { VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero)); @@ -397,7 +454,7 @@ namespace ChocolArm64.Instructions } context.Emit(OpCodes.Dup); - context.EmitStvectmp2(); + context.EmitStvectmp(); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareOrderedScalar), typesCmp)); VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero)); @@ -406,18 +463,18 @@ namespace ChocolArm64.Instructions context.Emit(OpCodes.Brtrue_S, lblNaN); - context.EmitLdc_I4(0); + context.Emit(OpCodes.Ldc_I4_0); + context.EmitLdvec(op.Rn); context.EmitLdvectmp(); - context.EmitLdvectmp2(); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThanOrEqualOrderedScalar), typesCmp)); + context.EmitLdvec(op.Rn); context.EmitLdvectmp(); - context.EmitLdvectmp2(); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqualOrderedScalar), typesCmp)); + context.EmitLdvec(op.Rn); context.EmitLdvectmp(); - context.EmitLdvectmp2(); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareLessThanOrderedScalar), typesCmp)); context.EmitStflg((int)PState.NBit); @@ -429,10 +486,10 @@ namespace ChocolArm64.Instructions context.MarkLabel(lblNaN); - context.EmitLdc_I4(1); - context.Emit(OpCodes.Dup); - context.EmitLdc_I4(0); - context.Emit(OpCodes.Dup); + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Ldc_I4_0); + context.Emit(OpCodes.Ldc_I4_0); context.EmitStflg((int)PState.NBit); context.EmitStflg((int)PState.ZBit); diff --git a/ChocolArm64/Instructions/InstEmitSimdCvt.cs b/ChocolArm64/Instructions/InstEmitSimdCvt.cs index 78a86a33eb..c5c61bcca5 100644 --- a/ChocolArm64/Instructions/InstEmitSimdCvt.cs +++ b/ChocolArm64/Instructions/InstEmitSimdCvt.cs @@ -21,26 +21,24 @@ namespace ChocolArm64.Instructions if (op.Size == 1 && op.Opc == 0) { //Double -> Single. - VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); + Type[] typesCvt = new Type[] { typeof(Vector128), typeof(Vector128) }; + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); context.EmitLdvec(op.Rn); - Type[] types = new Type[] { typeof(Vector128), typeof(Vector128) }; - - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertScalarToVector128Single), types)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertScalarToVector128Single), typesCvt)); context.EmitStvec(op.Rd); } else if (op.Size == 0 && op.Opc == 1) { //Single -> Double. - VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero)); + Type[] typesCvt = new Type[] { typeof(Vector128), typeof(Vector128) }; + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero)); context.EmitLdvec(op.Rn); - Type[] types = new Type[] { typeof(Vector128), typeof(Vector128) }; - - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertScalarToVector128Double), types)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertScalarToVector128Double), typesCvt)); context.EmitStvec(op.Rd); } @@ -80,14 +78,14 @@ namespace ChocolArm64.Instructions { Type[] typesCvt = new Type[] { typeof(Vector128) }; - string nameMov = op.RegisterSize == RegisterSize.Simd128 - ? nameof(Sse.MoveHighToLow) - : nameof(Sse.MoveLowToHigh); - context.EmitLdvec(op.Rn); - context.Emit(OpCodes.Dup); - context.EmitCall(typeof(Sse).GetMethod(nameMov)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.EmitLdvec(op.Rn); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveHighToLow))); + } context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Double), typesCvt)); @@ -249,12 +247,12 @@ namespace ChocolArm64.Instructions public static void Fcvtzs_S(ILEmitterCtx context) { - EmitScalarFcvtzs(context); + EmitFcvtz(context, signed: true, scalar: true); } public static void Fcvtzs_V(ILEmitterCtx context) { - EmitVectorFcvtzs(context); + EmitFcvtz(context, signed: true, scalar: false); } public static void Fcvtzu_Gp(ILEmitterCtx context) @@ -269,12 +267,12 @@ namespace ChocolArm64.Instructions public static void Fcvtzu_S(ILEmitterCtx context) { - EmitScalarFcvtzu(context); + EmitFcvtz(context, signed: false, scalar: true); } public static void Fcvtzu_V(ILEmitterCtx context) { - EmitVectorFcvtzu(context); + EmitFcvtz(context, signed: false, scalar: false); } public static void Scvtf_Gp(ILEmitterCtx context) @@ -415,11 +413,6 @@ namespace ChocolArm64.Instructions int bytes = op.GetBitsCount() >> 3; int elems = !scalar ? bytes >> sizeI : 1; - if (scalar && (sizeF == 0)) - { - EmitVectorZeroLowerTmp(context); - } - for (int index = 0; index < elems; index++) { EmitVectorExtractF(context, op.Rn, index, sizeF); @@ -441,13 +434,62 @@ namespace ChocolArm64.Instructions : nameof(VectorHelper.SatF64ToU64)); } - EmitVectorInsertTmp(context, index, sizeI); + if (scalar) + { + EmitVectorZeroAll(context, op.Rd); + } + + EmitVectorInsert(context, op.Rd, index, sizeI); } - context.EmitLdvectmp(); - context.EmitStvec(op.Rd); + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } - if ((op.RegisterSize == RegisterSize.Simd64) || scalar) + private static void EmitFcvtz(ILEmitterCtx context, bool signed, bool scalar) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + int sizeI = sizeF + 2; + + int fBits = GetFBits(context); + + int bytes = op.GetBitsCount() >> 3; + int elems = !scalar ? bytes >> sizeI : 1; + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractF(context, op.Rn, index, sizeF); + + EmitF2iFBitsMul(context, sizeF, fBits); + + if (sizeF == 0) + { + VectorHelper.EmitCall(context, signed + ? nameof(VectorHelper.SatF32ToS32) + : nameof(VectorHelper.SatF32ToU32)); + + context.Emit(OpCodes.Conv_U8); + } + else /* if (sizeF == 1) */ + { + VectorHelper.EmitCall(context, signed + ? nameof(VectorHelper.SatF64ToS64) + : nameof(VectorHelper.SatF64ToU64)); + } + + if (scalar) + { + EmitVectorZeroAll(context, op.Rd); + } + + EmitVectorInsert(context, op.Rd, index, sizeI); + } + + if (op.RegisterSize == RegisterSize.Simd64) { EmitVectorZeroUpper(context, op.Rd); } @@ -555,105 +597,6 @@ namespace ChocolArm64.Instructions } } - private static void EmitScalarFcvtzs(ILEmitterCtx context) - { - EmitScalarFcvtz(context, true); - } - - private static void EmitScalarFcvtzu(ILEmitterCtx context) - { - EmitScalarFcvtz(context, false); - } - - private static void EmitScalarFcvtz(ILEmitterCtx context, bool signed) - { - OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; - - int sizeF = op.Size & 1; - int sizeI = sizeF + 2; - - int fBits = GetFBits(context); - - EmitVectorExtractF(context, op.Rn, 0, sizeF); - - EmitF2iFBitsMul(context, sizeF, fBits); - - if (sizeF == 0) - { - VectorHelper.EmitCall(context, signed - ? nameof(VectorHelper.SatF32ToS32) - : nameof(VectorHelper.SatF32ToU32)); - } - else /* if (sizeF == 1) */ - { - VectorHelper.EmitCall(context, signed - ? nameof(VectorHelper.SatF64ToS64) - : nameof(VectorHelper.SatF64ToU64)); - } - - if (sizeF == 0) - { - context.Emit(OpCodes.Conv_U8); - } - - EmitScalarSet(context, op.Rd, sizeI); - } - - private static void EmitVectorFcvtzs(ILEmitterCtx context) - { - EmitVectorFcvtz(context, true); - } - - private static void EmitVectorFcvtzu(ILEmitterCtx context) - { - EmitVectorFcvtz(context, false); - } - - private static void EmitVectorFcvtz(ILEmitterCtx context, bool signed) - { - OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; - - int sizeF = op.Size & 1; - int sizeI = sizeF + 2; - - int fBits = GetFBits(context); - - int bytes = op.GetBitsCount() >> 3; - int elems = bytes >> sizeI; - - for (int index = 0; index < elems; index++) - { - EmitVectorExtractF(context, op.Rn, index, sizeF); - - EmitF2iFBitsMul(context, sizeF, fBits); - - if (sizeF == 0) - { - VectorHelper.EmitCall(context, signed - ? nameof(VectorHelper.SatF32ToS32) - : nameof(VectorHelper.SatF32ToU32)); - } - else /* if (sizeF == 1) */ - { - VectorHelper.EmitCall(context, signed - ? nameof(VectorHelper.SatF64ToS64) - : nameof(VectorHelper.SatF64ToU64)); - } - - if (sizeF == 0) - { - context.Emit(OpCodes.Conv_U8); - } - - EmitVectorInsert(context, op.Rd, index, sizeI); - } - - if (op.RegisterSize == RegisterSize.Simd64) - { - EmitVectorZeroUpper(context, op.Rd); - } - } - private static int GetFBits(ILEmitterCtx context) { if (context.CurrOp is OpCodeSimdShImm64 op) diff --git a/ChocolArm64/Instructions/InstEmitSimdHelper.cs b/ChocolArm64/Instructions/InstEmitSimdHelper.cs index b7dd09b4bb..10b86a3e17 100644 --- a/ChocolArm64/Instructions/InstEmitSimdHelper.cs +++ b/ChocolArm64/Instructions/InstEmitSimdHelper.cs @@ -592,12 +592,9 @@ namespace ChocolArm64.Instructions emit(); - EmitVectorInsertTmp(context, index, op.Size); + EmitVectorInsert(context, op.Rd, index, op.Size); } - context.EmitLdvectmp(); - context.EmitStvec(op.Rd); - if (op.RegisterSize == RegisterSize.Simd64) { EmitVectorZeroUpper(context, op.Rd); @@ -898,20 +895,13 @@ namespace ChocolArm64.Instructions Type[] types = new Type[] { typeof(Vector128), typeof(Vector128) }; context.EmitLdvec(op.Rn); - - context.Emit(OpCodes.Dup); - context.EmitStvectmp(); - context.EmitLdvec(op.Rm); - context.Emit(OpCodes.Dup); - context.EmitStvectmp2(); - context.EmitLdc_I4(2 << 6 | 0 << 4 | 2 << 2 | 0 << 0); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); - context.EmitLdvectmp(); - context.EmitLdvectmp2(); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitLdc_I4(3 << 6 | 1 << 4 | 3 << 2 | 1 << 0); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); @@ -926,19 +916,12 @@ namespace ChocolArm64.Instructions Type[] types = new Type[] { typeof(Vector128), typeof(Vector128) }; context.EmitLdvec(op.Rn); - - context.Emit(OpCodes.Dup); - context.EmitStvectmp(); - context.EmitLdvec(op.Rm); - context.Emit(OpCodes.Dup); - context.EmitStvectmp2(); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackLow), types)); - context.EmitLdvectmp(); - context.EmitLdvectmp2(); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackHigh), types)); @@ -985,11 +968,6 @@ namespace ChocolArm64.Instructions int bytes = op.GetBitsCount() >> 3; int elems = !scalar ? bytes >> op.Size : 1; - if (scalar) - { - EmitVectorZeroLowerTmp(context); - } - for (int index = 0; index < elems; index++) { EmitVectorExtractSx(context, op.Rn, index, op.Size); @@ -1005,13 +983,15 @@ namespace ChocolArm64.Instructions EmitUnarySignedSatQAbsOrNeg(context); } - EmitVectorInsertTmp(context, index, op.Size); + if (scalar) + { + EmitVectorZeroAll(context, op.Rd); + } + + EmitVectorInsert(context, op.Rd, index, op.Size); } - context.EmitLdvectmp(); - context.EmitStvec(op.Rd); - - if ((op.RegisterSize == RegisterSize.Simd64) || scalar) + if (op.RegisterSize == RegisterSize.Simd64) { EmitVectorZeroUpper(context, op.Rd); } @@ -1052,11 +1032,6 @@ namespace ChocolArm64.Instructions int bytes = op.GetBitsCount() >> 3; int elems = !scalar ? bytes >> op.Size : 1; - if (scalar) - { - EmitVectorZeroLowerTmp(context); - } - if (add || sub) { for (int index = 0; index < elems; index++) @@ -1082,7 +1057,12 @@ namespace ChocolArm64.Instructions } } - EmitVectorInsertTmp(context, index, op.Size); + if (scalar) + { + EmitVectorZeroAll(context, op.Rd); + } + + EmitVectorInsert(context, op.Rd, index, op.Size); } } else if (accumulate) @@ -1103,7 +1083,12 @@ namespace ChocolArm64.Instructions EmitBinarySatQAccumulate(context, signed); } - EmitVectorInsertTmp(context, index, op.Size); + if (scalar) + { + EmitVectorZeroAll(context, op.Rd); + } + + EmitVectorInsert(context, op.Rd, index, op.Size); } } else @@ -1117,14 +1102,16 @@ namespace ChocolArm64.Instructions EmitSatQ(context, op.Size, true, signed); - EmitVectorInsertTmp(context, index, op.Size); + if (scalar) + { + EmitVectorZeroAll(context, op.Rd); + } + + EmitVectorInsert(context, op.Rd, index, op.Size); } } - context.EmitLdvectmp(); - context.EmitStvec(op.Rd); - - if ((op.RegisterSize == RegisterSize.Simd64) || scalar) + if (op.RegisterSize == RegisterSize.Simd64) { EmitVectorZeroUpper(context, op.Rd); } @@ -1190,7 +1177,7 @@ namespace ChocolArm64.Instructions // TSrc (16bit, 32bit, 64bit; signed, unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned). public static void EmitSatQ(ILEmitterCtx context, int sizeDst, bool signedSrc, bool signedDst) { - if ((uint)sizeDst > 2) + if ((uint)sizeDst > 2u) { throw new ArgumentOutOfRangeException(nameof(sizeDst)); } @@ -1381,15 +1368,15 @@ namespace ChocolArm64.Instructions if (Optimizations.UseSse) { //TODO: Use Sse2.MoveScalar once it is fixed, - //as of the time of writing it just crashes the JIT (SDK 2.1.503). + //as of the time of writing it just crashes the JIT (SDK 2.1.504). /*Type[] typesMov = new Type[] { typeof(Vector128) }; - EmitLdvecWithUnsignedCast(context, reg, 3); + context.EmitLdvec(reg); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MoveScalar), typesMov)); - EmitStvecWithUnsignedCast(context, reg, 3);*/ + context.EmitStvec(reg);*/ context.EmitLdvec(reg); VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); diff --git a/ChocolArm64/Instructions/InstEmitSimdLogical.cs b/ChocolArm64/Instructions/InstEmitSimdLogical.cs index 6c718182db..bf80bada3e 100644 --- a/ChocolArm64/Instructions/InstEmitSimdLogical.cs +++ b/ChocolArm64/Instructions/InstEmitSimdLogical.cs @@ -30,12 +30,12 @@ namespace ChocolArm64.Instructions { OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; - Type[] typesAndNot = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesAnt = new Type[] { typeof(Vector128), typeof(Vector128) }; context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rn); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt)); context.EmitStvec(op.Rd); @@ -79,18 +79,18 @@ namespace ChocolArm64.Instructions if (Optimizations.UseSse2) { - Type[] typesXorAndNot = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesXorAnd = new Type[] { typeof(Vector128), typeof(Vector128) }; - string nameAndNot = notRm ? nameof(Sse2.AndNot) : nameof(Sse2.And); + string nameAnd = notRm ? nameof(Sse2.AndNot) : nameof(Sse2.And); context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rd); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAndNot)); - context.EmitCall(typeof(Sse2).GetMethod(nameAndNot, typesXorAndNot)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAndNot)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAnd)); + context.EmitCall(typeof(Sse2).GetMethod(nameAnd, typesXorAnd)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAnd)); context.EmitStvec(op.Rd); @@ -120,7 +120,6 @@ namespace ChocolArm64.Instructions } context.Emit(OpCodes.And); - context.Emit(OpCodes.Xor); EmitVectorInsert(context, op.Rd, index, 3); @@ -142,8 +141,7 @@ namespace ChocolArm64.Instructions Type[] typesXorAnd = new Type[] { typeof(Vector128), typeof(Vector128) }; context.EmitLdvec(op.Rm); - context.Emit(OpCodes.Dup); - + context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rn); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAnd)); @@ -151,7 +149,6 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rd); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesXorAnd)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAnd)); context.EmitStvec(op.Rd); @@ -196,15 +193,15 @@ namespace ChocolArm64.Instructions { OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; - Type[] typesSav = new Type[] { typeof(byte) }; - Type[] typesAndNot = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesSav = new Type[] { typeof(byte) }; + Type[] typesAnt = new Type[] { typeof(Vector128), typeof(Vector128) }; context.EmitLdvec(op.Rn); context.EmitLdc_I4(byte.MaxValue); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt)); context.EmitStvec(op.Rd); @@ -225,8 +222,8 @@ namespace ChocolArm64.Instructions { OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; - Type[] typesSav = new Type[] { typeof(byte) }; - Type[] typesAndNotOr = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesSav = new Type[] { typeof(byte) }; + Type[] typesAntOr = new Type[] { typeof(Vector128), typeof(Vector128) }; context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); @@ -234,8 +231,8 @@ namespace ChocolArm64.Instructions context.EmitLdc_I4(byte.MaxValue); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNotOr)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAndNotOr)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAntOr)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAntOr)); context.EmitStvec(op.Rd); diff --git a/ChocolArm64/Instructions/InstEmitSimdShift.cs b/ChocolArm64/Instructions/InstEmitSimdShift.cs index c0b20d7ea6..6865948ae0 100644 --- a/ChocolArm64/Instructions/InstEmitSimdShift.cs +++ b/ChocolArm64/Instructions/InstEmitSimdShift.cs @@ -5,6 +5,7 @@ using ChocolArm64.State; using ChocolArm64.Translation; using System; using System.Reflection.Emit; +using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; using static ChocolArm64.Instructions.InstEmitSimdHelper; @@ -13,9 +14,65 @@ namespace ChocolArm64.Instructions { static partial class InstEmit { +#region "Masks" + private static readonly long[] _masks_RshrnShrn = new long[] + { + 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0, + 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0, + 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0 + }; +#endregion + public static void Rshrn_V(ILEmitterCtx context) { - EmitVectorShrImmNarrowOpZx(context, round: true); + if (Optimizations.UseSsse3) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], VectorUIntTypesPerSizeLog2[op.Size + 1] }; + Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], typeof(byte) }; + Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesSav = new Type[] { UIntTypesPerSizeLog2[op.Size + 1] }; + Type[] typesSve = new Type[] { typeof(long), typeof(long) }; + + string nameMov = op.RegisterSize == RegisterSize.Simd128 + ? nameof(Sse.MoveLowToHigh) + : nameof(Sse.MoveHighToLow); + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + context.EmitLdvec(op.Rd); + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh))); + + context.EmitLdvec(op.Rn); + + context.EmitLdc_I8(roundConst); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + + context.EmitLdc_I4(shift); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl)); // value + + context.EmitLdc_I8(_masks_RshrnShrn[op.Size]); // mask + context.Emit(OpCodes.Dup); // mask + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve)); + + context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl)); + + context.EmitCall(typeof(Sse).GetMethod(nameMov)); + + context.EmitStvec(op.Rd); + } + else + { + EmitVectorShrImmNarrowOpZx(context, round: true); + } } public static void Shl_S(ILEmitterCtx context) @@ -80,12 +137,13 @@ namespace ChocolArm64.Instructions nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int64) }; - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rn); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); @@ -102,7 +160,45 @@ namespace ChocolArm64.Instructions public static void Shrn_V(ILEmitterCtx context) { - EmitVectorShrImmNarrowOpZx(context, round: false); + if (Optimizations.UseSsse3) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], typeof(byte) }; + Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesSve = new Type[] { typeof(long), typeof(long) }; + + string nameMov = op.RegisterSize == RegisterSize.Simd128 + ? nameof(Sse.MoveLowToHigh) + : nameof(Sse.MoveHighToLow); + + int shift = GetImmShr(op); + + context.EmitLdvec(op.Rd); + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh))); + + context.EmitLdvec(op.Rn); + + context.EmitLdc_I4(shift); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl)); // value + + context.EmitLdc_I8(_masks_RshrnShrn[op.Size]); // mask + context.Emit(OpCodes.Dup); // mask + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve)); + + context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl)); + + context.EmitCall(typeof(Sse).GetMethod(nameMov)); + + context.EmitStvec(op.Rd); + } + else + { + EmitVectorShrImmNarrowOpZx(context, round: false); + } } public static void Sli_V(ILEmitterCtx context) @@ -271,8 +367,7 @@ namespace ChocolArm64.Instructions { OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; - if (Optimizations.UseSse2 && op.Size > 0 - && op.Size < 3) + if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) { Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; @@ -282,16 +377,13 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rn); - context.Emit(OpCodes.Dup); - context.EmitStvectmp(); - context.EmitLdc_I4(eSize - shift); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs)); context.EmitLdc_I4(eSize - 1); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs)); - context.EmitLdvectmp(); + context.EmitLdvec(op.Rn); context.EmitLdc_I4(shift); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs)); @@ -320,8 +412,7 @@ namespace ChocolArm64.Instructions { OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; - if (Optimizations.UseSse2 && op.Size > 0 - && op.Size < 3) + if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) { Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; @@ -332,16 +423,13 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rn); - context.Emit(OpCodes.Dup); - context.EmitStvectmp(); - context.EmitLdc_I4(eSize - shift); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs)); context.EmitLdc_I4(eSize - 1); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs)); - context.EmitLdvectmp(); + context.EmitLdvec(op.Rn); context.EmitLdc_I4(shift); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs)); @@ -403,17 +491,21 @@ namespace ChocolArm64.Instructions nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int64) }; - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rn); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); - context.EmitLdc_I4(shift); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll)); + if (shift != 0) + { + context.EmitLdc_I4(shift); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll)); + } context.EmitStvec(op.Rd); } @@ -432,8 +524,7 @@ namespace ChocolArm64.Instructions { OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; - if (Optimizations.UseSse2 && op.Size > 0 - && op.Size < 3) + if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) { Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; @@ -464,8 +555,7 @@ namespace ChocolArm64.Instructions { OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; - if (Optimizations.UseSse2 && op.Size > 0 - && op.Size < 3) + if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) { Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; @@ -474,8 +564,8 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rn); context.EmitLdc_I4(GetImmShr(op)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); context.EmitStvec(op.Rd); @@ -612,16 +702,13 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rn); - context.Emit(OpCodes.Dup); - context.EmitStvectmp(); - context.EmitLdc_I4(eSize - shift); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs)); context.EmitLdc_I4(eSize - 1); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs)); - context.EmitLdvectmp(); + context.EmitLdvec(op.Rn); context.EmitLdc_I4(shift); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs)); @@ -661,16 +748,13 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rn); - context.Emit(OpCodes.Dup); - context.EmitStvectmp(); - context.EmitLdc_I4(eSize - shift); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs)); context.EmitLdc_I4(eSize - 1); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs)); - context.EmitLdvectmp(); + context.EmitLdvec(op.Rn); context.EmitLdc_I4(shift); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs)); @@ -732,17 +816,21 @@ namespace ChocolArm64.Instructions nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int64) }; - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rn); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); - context.EmitLdc_I4(shift); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll)); + if (shift != 0) + { + context.EmitLdc_I4(shift); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll)); + } context.EmitStvec(op.Rd); } @@ -801,8 +889,8 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rn); context.EmitLdc_I4(GetImmShr(op)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); context.EmitStvec(op.Rd); @@ -899,12 +987,9 @@ namespace ChocolArm64.Instructions context.Emit(OpCodes.Add); } - EmitVectorInsertTmp(context, index, op.Size); + EmitVectorInsert(context, op.Rd, index, op.Size); } - context.EmitLdvectmp(); - context.EmitStvec(op.Rd); - if ((op.RegisterSize == RegisterSize.Simd64) || scalar) { EmitVectorZeroUpper(context, op.Rd); @@ -1044,11 +1129,7 @@ namespace ChocolArm64.Instructions } // dst64 = (Int(src64, signed) + roundConst) >> shift; - private static void EmitShrImm64( - ILEmitterCtx context, - bool signed, - long roundConst, - int shift) + private static void EmitShrImm64(ILEmitterCtx context, bool signed, long roundConst, int shift) { context.EmitLdc_I8(roundConst); context.EmitLdc_I4(shift); diff --git a/ChocolArm64/Translation/ILEmitterCtx.cs b/ChocolArm64/Translation/ILEmitterCtx.cs index 91b72b13ae..f39bd37112 100644 --- a/ChocolArm64/Translation/ILEmitterCtx.cs +++ b/ChocolArm64/Translation/ILEmitterCtx.cs @@ -61,8 +61,7 @@ namespace ChocolArm64.Translation //Vectors are part of another "set" of locals. private const int VecGpTmp1Index = ReservedLocalsCount + 0; - private const int VecGpTmp2Index = ReservedLocalsCount + 1; - private const int UserVecTempStart = ReservedLocalsCount + 2; + private const int UserVecTempStart = ReservedLocalsCount + 1; private static int _userIntTempCount; private static int _userVecTempCount; @@ -630,9 +629,6 @@ namespace ChocolArm64.Translation public void EmitLdvectmp() => EmitLdvec(VecGpTmp1Index); public void EmitStvectmp() => EmitStvec(VecGpTmp1Index); - public void EmitLdvectmp2() => EmitLdvec(VecGpTmp2Index); - public void EmitStvectmp2() => EmitStvec(VecGpTmp2Index); - public void EmitLdint(int index) => Ldloc(index, VarType.Int); public void EmitStint(int index) => Stloc(index, VarType.Int); diff --git a/Ryujinx.Common/Utilities/BitUtils.cs b/Ryujinx.Common/Utilities/BitUtils.cs index b6fba4fba1..5f70f742a0 100644 --- a/Ryujinx.Common/Utilities/BitUtils.cs +++ b/Ryujinx.Common/Utilities/BitUtils.cs @@ -100,7 +100,7 @@ namespace Ryujinx.Common do { nibbleIdx -= 4; - preCount = ClzNibbleTbl[(value >> nibbleIdx) & 0b1111]; + preCount = ClzNibbleTbl[(int)(value >> nibbleIdx) & 0b1111]; count += preCount; } while (preCount == 4); @@ -136,4 +136,4 @@ namespace Ryujinx.Common return (value >> 32) | (value << 32); } } -} \ No newline at end of file +} diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs index 2f8604ebac..63e0bda83c 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs @@ -8,26 +8,6 @@ namespace Ryujinx.Tests.Cpu { public class CpuTestSimdArithmetic : CpuTest { - [TestCase(0x00000000u, 0x7F800000u)] - [TestCase(0x80000000u, 0xFF800000u)] - [TestCase(0x00FFF000u, 0x7E000000u)] - [TestCase(0x41200000u, 0x3DCC8000u)] - [TestCase(0xC1200000u, 0xBDCC8000u)] - [TestCase(0x001FFFFFu, 0x7F800000u)] - [TestCase(0x007FF000u, 0x7E800000u)] - public void Frecpe_S(uint a, uint result) - { - uint opcode = 0x5EA1D820; // FRECPE S0, S1 - - Vector128 v1 = MakeVectorE0(a); - - CpuThreadState threadState = SingleOpcode(opcode, v1: v1); - - Assert.That(GetVectorE0(threadState.V0), Is.EqualTo(result)); - - CompareAgainstUnicorn(); - } - [TestCase(0x3FE66666u, false, 0x40000000u)] [TestCase(0x3F99999Au, false, 0x3F800000u)] [TestCase(0x404CCCCDu, false, 0x40400000u)] @@ -601,19 +581,5 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } - - [TestCase(0x41200000u, 0x3EA18000u)] - public void Frsqrte_S(uint a, uint result) - { - uint opcode = 0x7EA1D820; // FRSQRTE S0, S1 - - Vector128 v1 = MakeVectorE0(a); - - CpuThreadState threadState = SingleOpcode(opcode, v1: v1); - - Assert.That(GetVectorE0(threadState.V0), Is.EqualTo(result)); - - CompareAgainstUnicorn(); - } } } diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdExt.cs b/Ryujinx.Tests/Cpu/CpuTestSimdExt.cs index f232989f77..b8548169be 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdExt.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdExt.cs @@ -19,7 +19,8 @@ namespace Ryujinx.Tests.Cpu } #endregion - private const int RndCnt = 2; + private const int RndCnt = 2; + private const int RndCntIndex = 2; [Test, Pairwise, Description("EXT .8B, .8B, .8B, #")] public void Ext_V_8B([Values(0u)] uint rd, @@ -28,7 +29,7 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_8B_")] [Random(RndCnt)] ulong z, [ValueSource("_8B_")] [Random(RndCnt)] ulong a, [ValueSource("_8B_")] [Random(RndCnt)] ulong b, - [Range(0u, 7u)] uint index) + [Values(0u, 7u)] [Random(1u, 6u, RndCntIndex)] uint index) { uint imm4 = index & 0x7u; @@ -52,7 +53,7 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_8B_")] [Random(RndCnt)] ulong z, [ValueSource("_8B_")] [Random(RndCnt)] ulong a, [ValueSource("_8B_")] [Random(RndCnt)] ulong b, - [Range(0u, 15u)] uint index) + [Values(0u, 15u)] [Random(1u, 14u, RndCntIndex)] uint index) { uint imm4 = index & 0xFu; diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs b/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs index 4ca54a2b42..fe93f06e37 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs @@ -67,7 +67,8 @@ namespace Ryujinx.Tests.Cpu } #endregion - private const int RndCnt = 2; + private const int RndCnt = 2; + private const int RndCntIndex = 2; [Test, Pairwise, Description("DUP ., ")] public void Dup_Gp_W([Values(0u)] uint rd, @@ -109,7 +110,7 @@ namespace Ryujinx.Tests.Cpu [Test, Pairwise, Description("DUP B0, V1.B[]")] public void Dup_S_B([ValueSource("_8B_")] [Random(RndCnt)] ulong a, - [Range(0u, 15u)] uint index) + [Values(0u, 15u)] [Random(1u, 14u, RndCntIndex)] uint index) { const int size = 0; @@ -129,7 +130,7 @@ namespace Ryujinx.Tests.Cpu [Test, Pairwise, Description("DUP H0, V1.H[]")] public void Dup_S_H([ValueSource("_4H_")] [Random(RndCnt)] ulong a, - [Range(0u, 7u)] uint index) + [Values(0u, 7u)] [Random(1u, 6u, RndCntIndex)] uint index) { const int size = 1; @@ -192,7 +193,7 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint rn, [ValueSource("_8B_")] [Random(RndCnt)] ulong z, [ValueSource("_8B_")] [Random(RndCnt)] ulong a, - [Range(0u, 15u)] uint index, + [Values(0u, 15u)] [Random(1u, 14u, RndCntIndex)] uint index, [Values(0b0u, 0b1u)] uint q) // <8B, 16B> { const int size = 0; @@ -217,7 +218,7 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint rn, [ValueSource("_4H_")] [Random(RndCnt)] ulong z, [ValueSource("_4H_")] [Random(RndCnt)] ulong a, - [Range(0u, 7u)] uint index, + [Values(0u, 7u)] [Random(1u, 6u, RndCntIndex)] uint index, [Values(0b0u, 0b1u)] uint q) // <4H, 8H> { const int size = 1; diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs index 8d2f4e9a34..1c418341b6 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs @@ -434,6 +434,26 @@ namespace Ryujinx.Tests.Cpu }; } + private static uint[] _Mla_Mls_Mul_V_8B_4H_2S_() + { + return new uint[] + { + 0x0E209400u, // MLA V0.8B, V0.8B, V0.8B + 0x2E209400u, // MLS V0.8B, V0.8B, V0.8B + 0x0E209C00u // MUL V0.8B, V0.8B, V0.8B + }; + } + + private static uint[] _Mla_Mls_Mul_V_16B_8H_4S_() + { + return new uint[] + { + 0x4E209400u, // MLA V0.16B, V0.16B, V0.16B + 0x6E209400u, // MLS V0.16B, V0.16B, V0.16B + 0x4E209C00u // MUL V0.16B, V0.16B, V0.16B + }; + } + private static uint[] _Sha1c_Sha1m_Sha1p_Sha1su0_V_() { return new uint[] @@ -1786,6 +1806,50 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(Fpsr.Ioc | Fpsr.Idc, FpSkips.IfUnderflow, FpTolerances.UpToOneUlpsD); } + [Test, Pairwise] + public void Mla_Mls_Mul_V_8B_4H_2S([ValueSource("_Mla_Mls_Mul_V_8B_4H_2S_")] uint opcodes, + [Values(0u)] uint rd, + [Values(1u, 0u)] uint rn, + [Values(2u, 0u)] uint rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong a, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong b, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> + { + opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); + opcodes |= ((size & 3) << 22); + + Vector128 v0 = MakeVectorE0E1(z, z); + Vector128 v1 = MakeVectorE0(a); + Vector128 v2 = MakeVectorE0(b); + + SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] + public void Mla_Mls_Mul_V_16B_8H_4S([ValueSource("_Mla_Mls_Mul_V_16B_8H_4S_")] uint opcodes, + [Values(0u)] uint rd, + [Values(1u, 0u)] uint rn, + [Values(2u, 0u)] uint rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong a, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong b, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S> + { + opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); + opcodes |= ((size & 3) << 22); + + Vector128 v0 = MakeVectorE0E1(z, z); + Vector128 v1 = MakeVectorE0E1(a, a); + Vector128 v2 = MakeVectorE0E1(b, b); + + SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } + [Test, Pairwise, Description("ORN ., ., .")] public void Orn_V_8B([Values(0u)] uint rd, [Values(1u, 0u)] uint rn, diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs b/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs index f026158cc7..9a295d5ed1 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs @@ -258,14 +258,15 @@ namespace Ryujinx.Tests.Cpu } #endregion - private const int RndCnt = 2; + private const int RndCnt = 2; + private const int RndCntShift = 2; [Test, Pairwise, Description("SHL , , #")] public void Shl_S_D([Values(0u)] uint rd, [Values(1u, 0u)] uint rn, [ValueSource("_1D_")] [Random(RndCnt)] ulong z, [ValueSource("_1D_")] [Random(RndCnt)] ulong a, - [Range(0u, 63u)] uint shift) + [Values(0u, 63u)] [Random(1u, 62u, RndCntShift)] uint shift) { uint immHb = (64 + shift) & 0x7F; @@ -286,7 +287,7 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint rn, [ValueSource("_8B_")] [Random(RndCnt)] ulong z, [ValueSource("_8B_")] [Random(RndCnt)] ulong a, - [Range(0u, 7u)] uint shift, + [Values(0u, 7u)] [Random(1u, 6u, RndCntShift)] uint shift, [Values(0b0u, 0b1u)] uint q) // <8B, 16B> { uint immHb = (8 + shift) & 0x7F; @@ -309,7 +310,7 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint rn, [ValueSource("_4H_")] [Random(RndCnt)] ulong z, [ValueSource("_4H_")] [Random(RndCnt)] ulong a, - [Range(0u, 15u)] uint shift, + [Values(0u, 15u)] [Random(1u, 14u, RndCntShift)] uint shift, [Values(0b0u, 0b1u)] uint q) // <4H, 8H> { uint immHb = (16 + shift) & 0x7F; @@ -332,7 +333,7 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint rn, [ValueSource("_2S_")] [Random(RndCnt)] ulong z, [ValueSource("_2S_")] [Random(RndCnt)] ulong a, - [Range(0u, 31u)] uint shift, + [Values(0u, 31u)] [Random(1u, 30u, RndCntShift)] uint shift, [Values(0b0u, 0b1u)] uint q) // <2S, 4S> { uint immHb = (32 + shift) & 0x7F; @@ -355,7 +356,7 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint rn, [ValueSource("_1D_")] [Random(RndCnt)] ulong z, [ValueSource("_1D_")] [Random(RndCnt)] ulong a, - [Range(0u, 63u)] uint shift) + [Values(0u, 63u)] [Random(1u, 62u, RndCntShift)] uint shift) { uint immHb = (64 + shift) & 0x7F; @@ -377,7 +378,7 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint rn, [ValueSource("_8B_")] [Random(RndCnt)] ulong z, [ValueSource("_8B_")] [Random(RndCnt)] ulong a, - [Range(0u, 7u)] uint shift, + [Values(0u, 7u)] [Random(1u, 6u, RndCntShift)] uint shift, [Values(0b0u, 0b1u)] uint q) // <8B8H, 16B8H> { uint immHb = (8 + shift) & 0x7F; @@ -400,7 +401,7 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint rn, [ValueSource("_4H_")] [Random(RndCnt)] ulong z, [ValueSource("_4H_")] [Random(RndCnt)] ulong a, - [Range(0u, 15u)] uint shift, + [Values(0u, 15u)] [Random(1u, 14u, RndCntShift)] uint shift, [Values(0b0u, 0b1u)] uint q) // <4H4S, 8H4S> { uint immHb = (16 + shift) & 0x7F; @@ -423,7 +424,7 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint rn, [ValueSource("_2S_")] [Random(RndCnt)] ulong z, [ValueSource("_2S_")] [Random(RndCnt)] ulong a, - [Range(0u, 31u)] uint shift, + [Values(0u, 31u)] [Random(1u, 30u, RndCntShift)] uint shift, [Values(0b0u, 0b1u)] uint q) // <2S2D, 4S2D> { uint immHb = (32 + shift) & 0x7F; @@ -446,7 +447,7 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint rn, [ValueSource("_1D_")] [Random(RndCnt)] ulong z, [ValueSource("_1D_")] [Random(RndCnt)] ulong a, - [Range(1u, 64u)] uint shift) + [Values(1u, 64u)] [Random(2u, 63u, RndCntShift)] uint shift) { uint immHb = (128 - shift) & 0x7F; @@ -467,7 +468,7 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint rn, [ValueSource("_8B_")] [Random(RndCnt)] ulong z, [ValueSource("_8B_")] [Random(RndCnt)] ulong a, - [Range(1u, 8u)] uint shift, + [Values(1u, 8u)] [Random(2u, 7u, RndCntShift)] uint shift, [Values(0b0u, 0b1u)] uint q) // <8B, 16B> { uint immHb = (16 - shift) & 0x7F; @@ -490,7 +491,7 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint rn, [ValueSource("_4H_")] [Random(RndCnt)] ulong z, [ValueSource("_4H_")] [Random(RndCnt)] ulong a, - [Range(1u, 16u)] uint shift, + [Values(1u, 16u)] [Random(2u, 15u, RndCntShift)] uint shift, [Values(0b0u, 0b1u)] uint q) // <4H, 8H> { uint immHb = (32 - shift) & 0x7F; @@ -513,7 +514,7 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint rn, [ValueSource("_2S_")] [Random(RndCnt)] ulong z, [ValueSource("_2S_")] [Random(RndCnt)] ulong a, - [Range(1u, 32u)] uint shift, + [Values(1u, 32u)] [Random(2u, 31u, RndCntShift)] uint shift, [Values(0b0u, 0b1u)] uint q) // <2S, 4S> { uint immHb = (64 - shift) & 0x7F; @@ -536,7 +537,7 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint rn, [ValueSource("_1D_")] [Random(RndCnt)] ulong z, [ValueSource("_1D_")] [Random(RndCnt)] ulong a, - [Range(1u, 64u)] uint shift) + [Values(1u, 64u)] [Random(2u, 63u, RndCntShift)] uint shift) { uint immHb = (128 - shift) & 0x7F; @@ -557,7 +558,7 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint rn, [ValueSource("_4H_")] [Random(RndCnt)] ulong z, [ValueSource("_4H_")] [Random(RndCnt)] ulong a, - [Range(1u, 8u)] uint shift, + [Values(1u, 8u)] [Random(2u, 7u, RndCntShift)] uint shift, [Values(0b0u, 0b1u)] uint q) // <8H8B, 8H16B> { uint immHb = (16 - shift) & 0x7F; @@ -580,7 +581,7 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint rn, [ValueSource("_2S_")] [Random(RndCnt)] ulong z, [ValueSource("_2S_")] [Random(RndCnt)] ulong a, - [Range(1u, 16u)] uint shift, + [Values(1u, 16u)] [Random(2u, 15u, RndCntShift)] uint shift, [Values(0b0u, 0b1u)] uint q) // <4S4H, 4S8H> { uint immHb = (32 - shift) & 0x7F; @@ -603,7 +604,7 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint rn, [ValueSource("_1D_")] [Random(RndCnt)] ulong z, [ValueSource("_1D_")] [Random(RndCnt)] ulong a, - [Range(1u, 32u)] uint shift, + [Values(1u, 32u)] [Random(2u, 31u, RndCntShift)] uint shift, [Values(0b0u, 0b1u)] uint q) // <2D2S, 2D4S> { uint immHb = (64 - shift) & 0x7F; @@ -626,7 +627,7 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint rn, [ValueSource("_1H_")] [Random(RndCnt)] ulong z, [ValueSource("_1H_")] [Random(RndCnt)] ulong a, - [Range(1u, 8u)] uint shift) + [Values(1u, 8u)] [Random(2u, 7u, RndCntShift)] uint shift) { uint immHb = (16 - shift) & 0x7F; @@ -647,7 +648,7 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint rn, [ValueSource("_1S_")] [Random(RndCnt)] ulong z, [ValueSource("_1S_")] [Random(RndCnt)] ulong a, - [Range(1u, 16u)] uint shift) + [Values(1u, 16u)] [Random(2u, 15u, RndCntShift)] uint shift) { uint immHb = (32 - shift) & 0x7F; @@ -668,7 +669,7 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint rn, [ValueSource("_1D_")] [Random(RndCnt)] ulong z, [ValueSource("_1D_")] [Random(RndCnt)] ulong a, - [Range(1u, 32u)] uint shift) + [Values(1u, 32u)] [Random(2u, 31u, RndCntShift)] uint shift) { uint immHb = (64 - shift) & 0x7F; @@ -689,7 +690,7 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint rn, [ValueSource("_4H_")] [Random(RndCnt)] ulong z, [ValueSource("_4H_")] [Random(RndCnt)] ulong a, - [Range(1u, 8u)] uint shift, + [Values(1u, 8u)] [Random(2u, 7u, RndCntShift)] uint shift, [Values(0b0u, 0b1u)] uint q) // <8H8B, 8H16B> { uint immHb = (16 - shift) & 0x7F; @@ -712,7 +713,7 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint rn, [ValueSource("_2S_")] [Random(RndCnt)] ulong z, [ValueSource("_2S_")] [Random(RndCnt)] ulong a, - [Range(1u, 16u)] uint shift, + [Values(1u, 16u)] [Random(2u, 15u, RndCntShift)] uint shift, [Values(0b0u, 0b1u)] uint q) // <4S4H, 4S8H> { uint immHb = (32 - shift) & 0x7F; @@ -735,7 +736,7 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint rn, [ValueSource("_1D_")] [Random(RndCnt)] ulong z, [ValueSource("_1D_")] [Random(RndCnt)] ulong a, - [Range(1u, 32u)] uint shift, + [Values(1u, 32u)] [Random(2u, 31u, RndCntShift)] uint shift, [Values(0b0u, 0b1u)] uint q) // <2D2S, 2D4S> { uint immHb = (64 - shift) & 0x7F; diff --git a/Ryujinx.Tests/Ryujinx.Tests.csproj b/Ryujinx.Tests/Ryujinx.Tests.csproj index 35405c769f..ce94326d24 100644 --- a/Ryujinx.Tests/Ryujinx.Tests.csproj +++ b/Ryujinx.Tests/Ryujinx.Tests.csproj @@ -16,9 +16,9 @@ - + - +