Add Rshrn_V & Shrn_V Sse opt.. Add Mla_V, Mls_V & Mul_V Sse opt.; add Tests. (#614)
* Update CountLeadingZeros(). * Remove obsolete Tests. * Follow-up. * Follow-up. * Follow-up. * Add Mla_V, Mls_V & Mul_V Tests. * Update PackageReferences. * Remove EmitLd/Stvectmp2(). * Remove Dup. Nits. * Remove EmitLd/Stvectmp2() & Dup; nits. * Remove Tmp stuff & Dup; rework Fcvtz() as Fcvtn(). * Remove Tmp stuff, EmitLd/Stvectmp2() & Dup. Nits. * Add (R)shrn_V Sse opt.; add "Part" & "Shift" opt.. Remove Tmp stuff; remove Dup. Nits. * Add Mla/Mls/Mul_V Sse opt.. Add "Part" opt.. Remove EmitLd/Stvectmp2(), remove Dup. Nits. * Nits. * Nits. * Nit. * Add "Part" opt.. Nit. * Nit. * Nit. * Add Cmhi_V & Cmhs_V Sse opt..
This commit is contained in:
parent
a0aecd1ff8
commit
1bef70c068
14 changed files with 707 additions and 484 deletions
|
@ -1,4 +1,5 @@
|
|||
// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
|
||||
// https://www.agner.org/optimize/#vectorclass @ vectori128.h
|
||||
|
||||
using ChocolArm64.Decoders;
|
||||
using ChocolArm64.State;
|
||||
|
@ -184,8 +185,8 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
if (sizeF == 0)
|
||||
{
|
||||
Type[] typesSsv = new Type[] { typeof(float) };
|
||||
Type[] typesSubAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
||||
Type[] typesSsv = new Type[] { typeof(float) };
|
||||
Type[] typesSubAnt = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
||||
|
||||
context.EmitLdc_R4(-0f);
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv));
|
||||
|
@ -193,8 +194,8 @@ namespace ChocolArm64.Instructions
|
|||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), typesSubAndNot));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), typesSubAnt));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAnt));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
|
@ -202,8 +203,8 @@ namespace ChocolArm64.Instructions
|
|||
}
|
||||
else /* if (sizeF == 1) */
|
||||
{
|
||||
Type[] typesSsv = new Type[] { typeof(double) };
|
||||
Type[] typesSubAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
||||
Type[] typesSsv = new Type[] { typeof(double) };
|
||||
Type[] typesSubAnt = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
||||
|
||||
context.EmitLdc_R8(-0d);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv));
|
||||
|
@ -211,8 +212,8 @@ namespace ChocolArm64.Instructions
|
|||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesSubAndNot));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesSubAnt));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAnt));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
|
@ -240,8 +241,8 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
if (sizeF == 0)
|
||||
{
|
||||
Type[] typesSav = new Type[] { typeof(float) };
|
||||
Type[] typesSubAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
||||
Type[] typesSav = new Type[] { typeof(float) };
|
||||
Type[] typesSubAnt = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
||||
|
||||
context.EmitLdc_R4(-0f);
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav));
|
||||
|
@ -249,8 +250,8 @@ namespace ChocolArm64.Instructions
|
|||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesSubAndNot));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesSubAnt));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAnt));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
|
@ -261,8 +262,8 @@ namespace ChocolArm64.Instructions
|
|||
}
|
||||
else /* if (sizeF == 1) */
|
||||
{
|
||||
Type[] typesSav = new Type[] { typeof(double) };
|
||||
Type[] typesSubAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
||||
Type[] typesSav = new Type[] { typeof(double) };
|
||||
Type[] typesSubAnt = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
||||
|
||||
context.EmitLdc_R8(-0d);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
|
||||
|
@ -270,8 +271,8 @@ namespace ChocolArm64.Instructions
|
|||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAndNot));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAnt));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAnt));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
}
|
||||
|
@ -295,15 +296,15 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
if (op.Size == 0)
|
||||
{
|
||||
Type[] typesSsv = new Type[] { typeof(float) };
|
||||
Type[] typesAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
||||
Type[] typesSsv = new Type[] { typeof(float) };
|
||||
Type[] typesAnt = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
||||
|
||||
context.EmitLdc_R4(-0f);
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv));
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAndNot));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAnt));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
|
@ -311,15 +312,15 @@ namespace ChocolArm64.Instructions
|
|||
}
|
||||
else /* if (op.Size == 1) */
|
||||
{
|
||||
Type[] typesSsv = new Type[] { typeof(double) };
|
||||
Type[] typesAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
||||
Type[] typesSsv = new Type[] { typeof(double) };
|
||||
Type[] typesAnt = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
||||
|
||||
context.EmitLdc_R8(-0d);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv));
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
|
@ -345,15 +346,15 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
if (sizeF == 0)
|
||||
{
|
||||
Type[] typesSav = new Type[] { typeof(float) };
|
||||
Type[] typesAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
||||
Type[] typesSav = new Type[] { typeof(float) };
|
||||
Type[] typesAnt = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
||||
|
||||
context.EmitLdc_R4(-0f);
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav));
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAndNot));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAnt));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
|
@ -364,15 +365,15 @@ namespace ChocolArm64.Instructions
|
|||
}
|
||||
else /* if (sizeF == 1) */
|
||||
{
|
||||
Type[] typesSav = new Type[] { typeof(double) };
|
||||
Type[] typesAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
||||
Type[] typesSav = new Type[] { typeof(double) };
|
||||
Type[] typesAnt = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
||||
|
||||
context.EmitLdc_R8(-0d);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
}
|
||||
|
@ -429,7 +430,7 @@ namespace ChocolArm64.Instructions
|
|||
Type[] typesAddH = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH));
|
||||
|
||||
|
@ -442,7 +443,7 @@ namespace ChocolArm64.Instructions
|
|||
Type[] typesAddH = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH));
|
||||
|
||||
|
@ -748,11 +749,13 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
context.EmitLdvec(op.Rd);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.Emit(OpCodes.Dup);
|
||||
|
||||
context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulAdd));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Add), typesMulAdd));
|
||||
|
||||
|
@ -770,11 +773,13 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
context.EmitLdvec(op.Rd);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.Emit(OpCodes.Dup);
|
||||
|
||||
context.EmitLdc_I4(op.Index | op.Index << 1);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulAdd));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd));
|
||||
|
||||
|
@ -863,11 +868,13 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
context.EmitLdvec(op.Rd);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.Emit(OpCodes.Dup);
|
||||
|
||||
context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulSub));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesMulSub));
|
||||
|
||||
|
@ -885,11 +892,13 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
context.EmitLdvec(op.Rd);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.Emit(OpCodes.Dup);
|
||||
|
||||
context.EmitLdc_I4(op.Index | op.Index << 1);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub));
|
||||
|
||||
|
@ -1000,11 +1009,13 @@ namespace ChocolArm64.Instructions
|
|||
Type[] typesMul = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.Emit(OpCodes.Dup);
|
||||
|
||||
context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMul));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
@ -1020,11 +1031,13 @@ namespace ChocolArm64.Instructions
|
|||
Type[] typesMul = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.Emit(OpCodes.Dup);
|
||||
|
||||
context.EmitLdc_I4(op.Index | op.Index << 1);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMul));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
@ -1772,11 +1785,18 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
public static void Mla_V(ILEmitterCtx context)
|
||||
{
|
||||
EmitVectorTernaryOpZx(context, () =>
|
||||
if (Optimizations.UseSse41)
|
||||
{
|
||||
context.Emit(OpCodes.Mul);
|
||||
context.Emit(OpCodes.Add);
|
||||
});
|
||||
EmitSse41Mul_AddSub(context, nameof(Sse2.Add));
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorTernaryOpZx(context, () =>
|
||||
{
|
||||
context.Emit(OpCodes.Mul);
|
||||
context.Emit(OpCodes.Add);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public static void Mla_Ve(ILEmitterCtx context)
|
||||
|
@ -1790,11 +1810,18 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
public static void Mls_V(ILEmitterCtx context)
|
||||
{
|
||||
EmitVectorTernaryOpZx(context, () =>
|
||||
if (Optimizations.UseSse41)
|
||||
{
|
||||
context.Emit(OpCodes.Mul);
|
||||
context.Emit(OpCodes.Sub);
|
||||
});
|
||||
EmitSse41Mul_AddSub(context, nameof(Sse2.Subtract));
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorTernaryOpZx(context, () =>
|
||||
{
|
||||
context.Emit(OpCodes.Mul);
|
||||
context.Emit(OpCodes.Sub);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public static void Mls_Ve(ILEmitterCtx context)
|
||||
|
@ -1808,7 +1835,14 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
public static void Mul_V(ILEmitterCtx context)
|
||||
{
|
||||
EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Mul));
|
||||
if (Optimizations.UseSse41)
|
||||
{
|
||||
EmitSse41Mul_AddSub(context);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Mul));
|
||||
}
|
||||
}
|
||||
|
||||
public static void Mul_Ve(ILEmitterCtx context)
|
||||
|
@ -1923,19 +1957,23 @@ namespace ChocolArm64.Instructions
|
|||
nameof(Sse41.ConvertToVector128Int32),
|
||||
nameof(Sse41.ConvertToVector128Int64) };
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
|
@ -1969,13 +2007,14 @@ namespace ChocolArm64.Instructions
|
|||
nameof(Sse41.ConvertToVector128Int32),
|
||||
nameof(Sse41.ConvertToVector128Int64) };
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
|
@ -1999,25 +2038,19 @@ namespace ChocolArm64.Instructions
|
|||
Type[] typesAndXorAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp();
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp2();
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndXorAdd));
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitLdvectmp2();
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesAndXorAdd));
|
||||
|
||||
context.EmitLdc_I4(1);
|
||||
context.Emit(OpCodes.Ldc_I4_1);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd));
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
|
@ -2185,20 +2218,24 @@ namespace ChocolArm64.Instructions
|
|||
? nameof(Sse41.ConvertToVector128Int16)
|
||||
: nameof(Sse41.ConvertToVector128Int32);
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rd);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
|
||||
|
||||
|
@ -2244,20 +2281,24 @@ namespace ChocolArm64.Instructions
|
|||
? nameof(Sse41.ConvertToVector128Int16)
|
||||
: nameof(Sse41.ConvertToVector128Int32);
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rd);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
|
||||
|
||||
|
@ -2441,19 +2482,23 @@ namespace ChocolArm64.Instructions
|
|||
nameof(Sse41.ConvertToVector128Int32),
|
||||
nameof(Sse41.ConvertToVector128Int64) };
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
|
@ -2482,13 +2527,14 @@ namespace ChocolArm64.Instructions
|
|||
nameof(Sse41.ConvertToVector128Int32),
|
||||
nameof(Sse41.ConvertToVector128Int64) };
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
|
@ -2594,19 +2640,23 @@ namespace ChocolArm64.Instructions
|
|||
nameof(Sse41.ConvertToVector128Int32),
|
||||
nameof(Sse41.ConvertToVector128Int64) };
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
|
@ -2659,13 +2709,14 @@ namespace ChocolArm64.Instructions
|
|||
nameof(Sse41.ConvertToVector128Int32),
|
||||
nameof(Sse41.ConvertToVector128Int64) };
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
|
@ -2689,25 +2740,19 @@ namespace ChocolArm64.Instructions
|
|||
Type[] typesAndXorAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp();
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp2();
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndXorAdd));
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitLdvectmp2();
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesAndXorAdd));
|
||||
|
||||
context.EmitLdc_I4(1);
|
||||
context.Emit(OpCodes.Ldc_I4_1);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd));
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
|
@ -2737,8 +2782,7 @@ namespace ChocolArm64.Instructions
|
|||
Type[] typesAvgSub = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.Emit(OpCodes.Dup);
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvgSub));
|
||||
|
@ -2862,20 +2906,24 @@ namespace ChocolArm64.Instructions
|
|||
? nameof(Sse41.ConvertToVector128Int16)
|
||||
: nameof(Sse41.ConvertToVector128Int32);
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rd);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
|
||||
|
||||
|
@ -2921,20 +2969,24 @@ namespace ChocolArm64.Instructions
|
|||
? nameof(Sse41.ConvertToVector128Int16)
|
||||
: nameof(Sse41.ConvertToVector128Int32);
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rd);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
|
||||
|
||||
|
@ -3063,19 +3115,23 @@ namespace ChocolArm64.Instructions
|
|||
nameof(Sse41.ConvertToVector128Int32),
|
||||
nameof(Sse41.ConvertToVector128Int64) };
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
|
@ -3104,13 +3160,14 @@ namespace ChocolArm64.Instructions
|
|||
nameof(Sse41.ConvertToVector128Int32),
|
||||
nameof(Sse41.ConvertToVector128Int64) };
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
|
@ -3253,5 +3310,77 @@ namespace ChocolArm64.Instructions
|
|||
EmitVectorZeroUpper(context, op.Rd);
|
||||
}
|
||||
}
|
||||
|
||||
private static void EmitSse41Mul_AddSub(ILEmitterCtx context, string nameAddSub = null)
|
||||
{
|
||||
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
|
||||
|
||||
if (nameAddSub != null)
|
||||
{
|
||||
context.EmitLdvec(op.Rd);
|
||||
}
|
||||
|
||||
if (op.Size == 0)
|
||||
{
|
||||
Type[] typesBle = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) };
|
||||
Type[] typesMul = new Type[] { typeof(Vector128<short>), typeof(Vector128<short>) };
|
||||
Type[] typesShs = new Type[] { typeof(Vector128<short>), typeof(byte) };
|
||||
Type[] typesSav = new Type[] { typeof(int) };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyLow), typesMul));
|
||||
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyLow), typesMul));
|
||||
|
||||
context.EmitLdc_I4(0x00FF00FF);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(nameof(Sse41.BlendVariable), typesBle));
|
||||
}
|
||||
else if (op.Size == 1)
|
||||
{
|
||||
Type[] typesMul = new Type[] { typeof(Vector128<short>), typeof(Vector128<short>) };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyLow), typesMul));
|
||||
}
|
||||
else /* if (op.Size == 2) */
|
||||
{
|
||||
Type[] typesMul = new Type[] { typeof(Vector128<int>), typeof(Vector128<int>) };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(nameof(Sse41.MultiplyLow), typesMul));
|
||||
}
|
||||
|
||||
if (nameAddSub != null)
|
||||
{
|
||||
Type[] typesAddSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameAddSub, typesAddSub));
|
||||
}
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
EmitVectorZeroUpper(context, op.Rd);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue