Add Rshrn_V & Shrn_V Sse opt.. Add Mla_V, Mls_V & Mul_V Sse opt.; add Tests. (#614)

* Update CountLeadingZeros().

* Remove obsolete Tests.

* Follow-up.

* Follow-up.

* Follow-up.

* Add Mla_V, Mls_V & Mul_V Tests.

* Update PackageReferences.

* Remove EmitLd/Stvectmp2().

* Remove Dup. Nits.

* Remove EmitLd/Stvectmp2() & Dup; nits.

* Remove Tmp stuff & Dup; rework Fcvtz() as Fcvtn().

* Remove Tmp stuff, EmitLd/Stvectmp2() & Dup. Nits.

* Add (R)shrn_V Sse opt.; add "Part" & "Shift" opt..

Remove Tmp stuff; remove Dup.
Nits.

* Add Mla/Mls/Mul_V Sse opt.. Add "Part" opt..

Remove EmitLd/Stvectmp2(), remove Dup.
Nits.

* Nits.

* Nits.

* Nit.

* Add "Part" opt.. Nit.

* Nit.

* Nit.

* Add Cmhi_V & Cmhs_V Sse opt..
This commit is contained in:
LDj3SNuD 2019-03-13 09:23:52 +01:00 committed by jduncanator
parent a0aecd1ff8
commit 1bef70c068
14 changed files with 707 additions and 484 deletions

View file

@ -1,4 +1,5 @@
// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
// https://www.agner.org/optimize/#vectorclass @ vectori128.h
using ChocolArm64.Decoders;
using ChocolArm64.State;
@ -184,8 +185,8 @@ namespace ChocolArm64.Instructions
if (sizeF == 0)
{
Type[] typesSsv = new Type[] { typeof(float) };
Type[] typesSubAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
Type[] typesSsv = new Type[] { typeof(float) };
Type[] typesSubAnt = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdc_R4(-0f);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv));
@ -193,8 +194,8 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), typesSubAndNot));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), typesSubAnt));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAnt));
context.EmitStvec(op.Rd);
@ -202,8 +203,8 @@ namespace ChocolArm64.Instructions
}
else /* if (sizeF == 1) */
{
Type[] typesSsv = new Type[] { typeof(double) };
Type[] typesSubAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
Type[] typesSsv = new Type[] { typeof(double) };
Type[] typesSubAnt = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv));
@ -211,8 +212,8 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesSubAndNot));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesSubAnt));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAnt));
context.EmitStvec(op.Rd);
@ -240,8 +241,8 @@ namespace ChocolArm64.Instructions
if (sizeF == 0)
{
Type[] typesSav = new Type[] { typeof(float) };
Type[] typesSubAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
Type[] typesSav = new Type[] { typeof(float) };
Type[] typesSubAnt = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdc_R4(-0f);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav));
@ -249,8 +250,8 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesSubAndNot));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesSubAnt));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAnt));
context.EmitStvec(op.Rd);
@ -261,8 +262,8 @@ namespace ChocolArm64.Instructions
}
else /* if (sizeF == 1) */
{
Type[] typesSav = new Type[] { typeof(double) };
Type[] typesSubAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
Type[] typesSav = new Type[] { typeof(double) };
Type[] typesSubAnt = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
@ -270,8 +271,8 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAndNot));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAnt));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAnt));
context.EmitStvec(op.Rd);
}
@ -295,15 +296,15 @@ namespace ChocolArm64.Instructions
if (op.Size == 0)
{
Type[] typesSsv = new Type[] { typeof(float) };
Type[] typesAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
Type[] typesSsv = new Type[] { typeof(float) };
Type[] typesAnt = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdc_R4(-0f);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv));
context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAndNot));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAnt));
context.EmitStvec(op.Rd);
@ -311,15 +312,15 @@ namespace ChocolArm64.Instructions
}
else /* if (op.Size == 1) */
{
Type[] typesSsv = new Type[] { typeof(double) };
Type[] typesAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
Type[] typesSsv = new Type[] { typeof(double) };
Type[] typesAnt = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv));
context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt));
context.EmitStvec(op.Rd);
@ -345,15 +346,15 @@ namespace ChocolArm64.Instructions
if (sizeF == 0)
{
Type[] typesSav = new Type[] { typeof(float) };
Type[] typesAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
Type[] typesSav = new Type[] { typeof(float) };
Type[] typesAnt = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdc_R4(-0f);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav));
context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAndNot));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAnt));
context.EmitStvec(op.Rd);
@ -364,15 +365,15 @@ namespace ChocolArm64.Instructions
}
else /* if (sizeF == 1) */
{
Type[] typesSav = new Type[] { typeof(double) };
Type[] typesAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
Type[] typesSav = new Type[] { typeof(double) };
Type[] typesAnt = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt));
context.EmitStvec(op.Rd);
}
@ -429,7 +430,7 @@ namespace ChocolArm64.Instructions
Type[] typesAddH = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Dup);
context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH));
@ -442,7 +443,7 @@ namespace ChocolArm64.Instructions
Type[] typesAddH = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Dup);
context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH));
@ -748,11 +749,13 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup);
context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulAdd));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Add), typesMulAdd));
@ -770,11 +773,13 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup);
context.EmitLdc_I4(op.Index | op.Index << 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulAdd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd));
@ -863,11 +868,13 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup);
context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulSub));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesMulSub));
@ -885,11 +892,13 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup);
context.EmitLdc_I4(op.Index | op.Index << 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub));
@ -1000,11 +1009,13 @@ namespace ChocolArm64.Instructions
Type[] typesMul = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup);
context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMul));
context.EmitStvec(op.Rd);
@ -1020,11 +1031,13 @@ namespace ChocolArm64.Instructions
Type[] typesMul = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup);
context.EmitLdc_I4(op.Index | op.Index << 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMul));
context.EmitStvec(op.Rd);
@ -1772,11 +1785,18 @@ namespace ChocolArm64.Instructions
public static void Mla_V(ILEmitterCtx context)
{
EmitVectorTernaryOpZx(context, () =>
if (Optimizations.UseSse41)
{
context.Emit(OpCodes.Mul);
context.Emit(OpCodes.Add);
});
EmitSse41Mul_AddSub(context, nameof(Sse2.Add));
}
else
{
EmitVectorTernaryOpZx(context, () =>
{
context.Emit(OpCodes.Mul);
context.Emit(OpCodes.Add);
});
}
}
public static void Mla_Ve(ILEmitterCtx context)
@ -1790,11 +1810,18 @@ namespace ChocolArm64.Instructions
public static void Mls_V(ILEmitterCtx context)
{
EmitVectorTernaryOpZx(context, () =>
if (Optimizations.UseSse41)
{
context.Emit(OpCodes.Mul);
context.Emit(OpCodes.Sub);
});
EmitSse41Mul_AddSub(context, nameof(Sse2.Subtract));
}
else
{
EmitVectorTernaryOpZx(context, () =>
{
context.Emit(OpCodes.Mul);
context.Emit(OpCodes.Sub);
});
}
}
public static void Mls_Ve(ILEmitterCtx context)
@ -1808,7 +1835,14 @@ namespace ChocolArm64.Instructions
public static void Mul_V(ILEmitterCtx context)
{
EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Mul));
if (Optimizations.UseSse41)
{
EmitSse41Mul_AddSub(context);
}
else
{
EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Mul));
}
}
public static void Mul_Ve(ILEmitterCtx context)
@ -1923,19 +1957,23 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
@ -1969,13 +2007,14 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
@ -1999,25 +2038,19 @@ namespace ChocolArm64.Instructions
Type[] typesAndXorAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Dup);
context.EmitStvectmp();
context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup);
context.EmitStvectmp2();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndXorAdd));
context.EmitLdvectmp();
context.EmitLdvectmp2();
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesAndXorAdd));
context.EmitLdc_I4(1);
context.Emit(OpCodes.Ldc_I4_1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd));
context.EmitStvec(op.Rd);
@ -2185,20 +2218,24 @@ namespace ChocolArm64.Instructions
? nameof(Sse41.ConvertToVector128Int16)
: nameof(Sse41.ConvertToVector128Int32);
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
@ -2244,20 +2281,24 @@ namespace ChocolArm64.Instructions
? nameof(Sse41.ConvertToVector128Int16)
: nameof(Sse41.ConvertToVector128Int32);
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
@ -2441,19 +2482,23 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
@ -2482,13 +2527,14 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
@ -2594,19 +2640,23 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
@ -2659,13 +2709,14 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
@ -2689,25 +2740,19 @@ namespace ChocolArm64.Instructions
Type[] typesAndXorAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Dup);
context.EmitStvectmp();
context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup);
context.EmitStvectmp2();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndXorAdd));
context.EmitLdvectmp();
context.EmitLdvectmp2();
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesAndXorAdd));
context.EmitLdc_I4(1);
context.Emit(OpCodes.Ldc_I4_1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd));
context.EmitStvec(op.Rd);
@ -2737,8 +2782,7 @@ namespace ChocolArm64.Instructions
Type[] typesAvgSub = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Dup);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvgSub));
@ -2862,20 +2906,24 @@ namespace ChocolArm64.Instructions
? nameof(Sse41.ConvertToVector128Int16)
: nameof(Sse41.ConvertToVector128Int32);
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
@ -2921,20 +2969,24 @@ namespace ChocolArm64.Instructions
? nameof(Sse41.ConvertToVector128Int16)
: nameof(Sse41.ConvertToVector128Int32);
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
@ -3063,19 +3115,23 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
@ -3104,13 +3160,14 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
@ -3253,5 +3310,77 @@ namespace ChocolArm64.Instructions
EmitVectorZeroUpper(context, op.Rd);
}
}
private static void EmitSse41Mul_AddSub(ILEmitterCtx context, string nameAddSub = null)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
if (nameAddSub != null)
{
context.EmitLdvec(op.Rd);
}
if (op.Size == 0)
{
Type[] typesBle = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) };
Type[] typesMul = new Type[] { typeof(Vector128<short>), typeof(Vector128<short>) };
Type[] typesShs = new Type[] { typeof(Vector128<short>), typeof(byte) };
Type[] typesSav = new Type[] { typeof(int) };
context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyLow), typesMul));
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyLow), typesMul));
context.EmitLdc_I4(0x00FF00FF);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
context.EmitCall(typeof(Sse41).GetMethod(nameof(Sse41.BlendVariable), typesBle));
}
else if (op.Size == 1)
{
Type[] typesMul = new Type[] { typeof(Vector128<short>), typeof(Vector128<short>) };
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyLow), typesMul));
}
else /* if (op.Size == 2) */
{
Type[] typesMul = new Type[] { typeof(Vector128<int>), typeof(Vector128<int>) };
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse41).GetMethod(nameof(Sse41.MultiplyLow), typesMul));
}
if (nameAddSub != null)
{
Type[] typesAddSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
context.EmitCall(typeof(Sse2).GetMethod(nameAddSub, typesAddSub));
}
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
}
}