Add Rshrn_V & Shrn_V Sse opt.. Add Mla_V, Mls_V & Mul_V Sse opt.; add Tests. (#614)
* Update CountLeadingZeros(). * Remove obsolete Tests. * Follow-up. * Follow-up. * Follow-up. * Add Mla_V, Mls_V & Mul_V Tests. * Update PackageReferences. * Remove EmitLd/Stvectmp2(). * Remove Dup. Nits. * Remove EmitLd/Stvectmp2() & Dup; nits. * Remove Tmp stuff & Dup; rework Fcvtz() as Fcvtn(). * Remove Tmp stuff, EmitLd/Stvectmp2() & Dup. Nits. * Add (R)shrn_V Sse opt.; add "Part" & "Shift" opt.. Remove Tmp stuff; remove Dup. Nits. * Add Mla/Mls/Mul_V Sse opt.. Add "Part" opt.. Remove EmitLd/Stvectmp2(), remove Dup. Nits. * Nits. * Nits. * Nit. * Add "Part" opt.. Nit. * Nit. * Nit. * Add Cmhi_V & Cmhs_V Sse opt..
This commit is contained in:
parent
a0aecd1ff8
commit
1bef70c068
14 changed files with 707 additions and 484 deletions
|
@ -592,12 +592,9 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
emit();
|
||||
|
||||
EmitVectorInsertTmp(context, index, op.Size);
|
||||
EmitVectorInsert(context, op.Rd, index, op.Size);
|
||||
}
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
EmitVectorZeroUpper(context, op.Rd);
|
||||
|
@ -898,20 +895,13 @@ namespace ChocolArm64.Instructions
|
|||
Type[] types = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp();
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp2();
|
||||
|
||||
context.EmitLdc_I4(2 << 6 | 0 << 4 | 2 << 2 | 0 << 0);
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitLdvectmp2();
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(3 << 6 | 1 << 4 | 3 << 2 | 1 << 0);
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
|
||||
|
@ -926,19 +916,12 @@ namespace ChocolArm64.Instructions
|
|||
Type[] types = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp();
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp2();
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackLow), types));
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitLdvectmp2();
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackHigh), types));
|
||||
|
||||
|
@ -985,11 +968,6 @@ namespace ChocolArm64.Instructions
|
|||
int bytes = op.GetBitsCount() >> 3;
|
||||
int elems = !scalar ? bytes >> op.Size : 1;
|
||||
|
||||
if (scalar)
|
||||
{
|
||||
EmitVectorZeroLowerTmp(context);
|
||||
}
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
EmitVectorExtractSx(context, op.Rn, index, op.Size);
|
||||
|
@ -1005,13 +983,15 @@ namespace ChocolArm64.Instructions
|
|||
EmitUnarySignedSatQAbsOrNeg(context);
|
||||
}
|
||||
|
||||
EmitVectorInsertTmp(context, index, op.Size);
|
||||
if (scalar)
|
||||
{
|
||||
EmitVectorZeroAll(context, op.Rd);
|
||||
}
|
||||
|
||||
EmitVectorInsert(context, op.Rd, index, op.Size);
|
||||
}
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
EmitVectorZeroUpper(context, op.Rd);
|
||||
}
|
||||
|
@ -1052,11 +1032,6 @@ namespace ChocolArm64.Instructions
|
|||
int bytes = op.GetBitsCount() >> 3;
|
||||
int elems = !scalar ? bytes >> op.Size : 1;
|
||||
|
||||
if (scalar)
|
||||
{
|
||||
EmitVectorZeroLowerTmp(context);
|
||||
}
|
||||
|
||||
if (add || sub)
|
||||
{
|
||||
for (int index = 0; index < elems; index++)
|
||||
|
@ -1082,7 +1057,12 @@ namespace ChocolArm64.Instructions
|
|||
}
|
||||
}
|
||||
|
||||
EmitVectorInsertTmp(context, index, op.Size);
|
||||
if (scalar)
|
||||
{
|
||||
EmitVectorZeroAll(context, op.Rd);
|
||||
}
|
||||
|
||||
EmitVectorInsert(context, op.Rd, index, op.Size);
|
||||
}
|
||||
}
|
||||
else if (accumulate)
|
||||
|
@ -1103,7 +1083,12 @@ namespace ChocolArm64.Instructions
|
|||
EmitBinarySatQAccumulate(context, signed);
|
||||
}
|
||||
|
||||
EmitVectorInsertTmp(context, index, op.Size);
|
||||
if (scalar)
|
||||
{
|
||||
EmitVectorZeroAll(context, op.Rd);
|
||||
}
|
||||
|
||||
EmitVectorInsert(context, op.Rd, index, op.Size);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -1117,14 +1102,16 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
EmitSatQ(context, op.Size, true, signed);
|
||||
|
||||
EmitVectorInsertTmp(context, index, op.Size);
|
||||
if (scalar)
|
||||
{
|
||||
EmitVectorZeroAll(context, op.Rd);
|
||||
}
|
||||
|
||||
EmitVectorInsert(context, op.Rd, index, op.Size);
|
||||
}
|
||||
}
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
EmitVectorZeroUpper(context, op.Rd);
|
||||
}
|
||||
|
@ -1190,7 +1177,7 @@ namespace ChocolArm64.Instructions
|
|||
// TSrc (16bit, 32bit, 64bit; signed, unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned).
|
||||
public static void EmitSatQ(ILEmitterCtx context, int sizeDst, bool signedSrc, bool signedDst)
|
||||
{
|
||||
if ((uint)sizeDst > 2)
|
||||
if ((uint)sizeDst > 2u)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(sizeDst));
|
||||
}
|
||||
|
@ -1381,15 +1368,15 @@ namespace ChocolArm64.Instructions
|
|||
if (Optimizations.UseSse)
|
||||
{
|
||||
//TODO: Use Sse2.MoveScalar once it is fixed,
|
||||
//as of the time of writing it just crashes the JIT (SDK 2.1.503).
|
||||
//as of the time of writing it just crashes the JIT (SDK 2.1.504).
|
||||
|
||||
/*Type[] typesMov = new Type[] { typeof(Vector128<ulong>) };
|
||||
|
||||
EmitLdvecWithUnsignedCast(context, reg, 3);
|
||||
context.EmitLdvec(reg);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MoveScalar), typesMov));
|
||||
|
||||
EmitStvecWithUnsignedCast(context, reg, 3);*/
|
||||
context.EmitStvec(reg);*/
|
||||
|
||||
context.EmitLdvec(reg);
|
||||
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue