Add Sse Opt. for S/Umax_V, S/Umin_V, S/Uaddw_V, S/Usubw_V, Fabs_S/V, Fneg_S/V Inst.; for Fcvtl_V, Fcvtn_V Inst.; and for Fcmp_S Inst.. Add/Improve other Sse Opt.. Add Tests. (#496)
* Update CpuTest.cs * Update CpuTestSimd.cs * Update CpuTestSimdReg.cs * Update InstEmitSimdCmp.cs * Update SoftFloat.cs * Update InstEmitAluHelper.cs * Update InstEmitSimdArithmetic.cs * Update InstEmitSimdHelper.cs * Update VectorHelper.cs * Update InstEmitSimdCvt.cs * Update InstEmitSimdArithmetic.cs * Update CpuTestSimd.cs * Update InstEmitSimdArithmetic.cs * Update OpCodeTable.cs * Update InstEmitSimdArithmetic.cs * Update InstEmitSimdCmp.cs * Update InstEmitSimdCvt.cs * Update CpuTestSimd.cs * Update CpuTestSimdReg.cs * Create CpuTestSimdFcond.cs * Update OpCodeTable.cs * Update InstEmitSimdMove.cs * Update CpuTestSimdIns.cs * Create CpuTestSimdExt.cs * Nit. * Update PackageReference.
This commit is contained in:
parent
b7613dd4b8
commit
e603b7afbc
16 changed files with 2049 additions and 337 deletions
|
@ -3,6 +3,7 @@ using ChocolArm64.State;
|
|||
using ChocolArm64.Translation;
|
||||
using System;
|
||||
using System.Reflection.Emit;
|
||||
using System.Runtime.Intrinsics;
|
||||
using System.Runtime.Intrinsics.X86;
|
||||
|
||||
using static ChocolArm64.Instructions.InstEmitSimdHelper;
|
||||
|
@ -17,6 +18,8 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
if (Optimizations.UseSse2)
|
||||
{
|
||||
Type[] typesSav = new Type[] { UIntTypesPerSizeLog2[op.Size] };
|
||||
|
||||
context.EmitLdintzr(op.Rn);
|
||||
|
||||
switch (op.Size)
|
||||
|
@ -26,16 +29,9 @@ namespace ChocolArm64.Instructions
|
|||
case 2: context.Emit(OpCodes.Conv_U4); break;
|
||||
}
|
||||
|
||||
Type[] types = new Type[] { UIntTypesPerSizeLog2[op.Size] };
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), types));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
|
||||
|
||||
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
|
||||
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
EmitVectorZeroUpper(context, op.Rd);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -48,11 +44,11 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
EmitVectorInsert(context, op.Rd, index, op.Size);
|
||||
}
|
||||
}
|
||||
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
EmitVectorZeroUpper(context, op.Rd);
|
||||
}
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
EmitVectorZeroUpper(context, op.Rd);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -69,14 +65,34 @@ namespace ChocolArm64.Instructions
|
|||
{
|
||||
OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp;
|
||||
|
||||
int bytes = op.GetBitsCount() >> 3;
|
||||
int elems = bytes >> op.Size;
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
if (Optimizations.UseSse2)
|
||||
{
|
||||
Type[] typesSav = new Type[] { UIntTypesPerSizeLog2[op.Size] };
|
||||
|
||||
EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
|
||||
|
||||
EmitVectorInsert(context, op.Rd, index, op.Size);
|
||||
switch (op.Size)
|
||||
{
|
||||
case 0: context.Emit(OpCodes.Conv_U1); break;
|
||||
case 1: context.Emit(OpCodes.Conv_U2); break;
|
||||
case 2: context.Emit(OpCodes.Conv_U4); break;
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
|
||||
|
||||
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
|
||||
}
|
||||
else
|
||||
{
|
||||
int bytes = op.GetBitsCount() >> 3;
|
||||
int elems = bytes >> op.Size;
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
|
||||
|
||||
EmitVectorInsert(context, op.Rd, index, op.Size);
|
||||
}
|
||||
}
|
||||
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
|
@ -89,32 +105,65 @@ namespace ChocolArm64.Instructions
|
|||
{
|
||||
OpCodeSimdExt64 op = (OpCodeSimdExt64)context.CurrOp;
|
||||
|
||||
context.EmitLdvec(op.Rd);
|
||||
context.EmitStvectmp();
|
||||
|
||||
int bytes = op.GetBitsCount() >> 3;
|
||||
|
||||
int position = op.Imm4;
|
||||
|
||||
for (int index = 0; index < bytes; index++)
|
||||
if (Optimizations.UseSse2)
|
||||
{
|
||||
int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm;
|
||||
Type[] typesShs = new Type[] { typeof(Vector128<byte>), typeof(byte) };
|
||||
Type[] typesOr = new Type[] { typeof(Vector128<byte>), typeof(Vector128<byte>) };
|
||||
|
||||
if (position == bytes)
|
||||
EmitLdvecWithUnsignedCast(context, op.Rn, 0);
|
||||
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
position = 0;
|
||||
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
|
||||
}
|
||||
|
||||
EmitVectorExtractZx(context, reg, position++, 0);
|
||||
EmitVectorInsertTmp(context, index, 0);
|
||||
context.EmitLdc_I4(op.Imm4);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesShs));
|
||||
|
||||
EmitLdvecWithUnsignedCast(context, op.Rm, 0);
|
||||
|
||||
context.EmitLdc_I4((op.RegisterSize == RegisterSize.Simd64 ? 8 : 16) - op.Imm4);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical128BitLane), typesShs));
|
||||
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr));
|
||||
|
||||
EmitStvecWithUnsignedCast(context, op.Rd, 0);
|
||||
}
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
else
|
||||
{
|
||||
EmitVectorZeroUpper(context, op.Rd);
|
||||
int bytes = op.GetBitsCount() >> 3;
|
||||
|
||||
int position = op.Imm4;
|
||||
|
||||
for (int index = 0; index < bytes; index++)
|
||||
{
|
||||
int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm;
|
||||
|
||||
if (position == bytes)
|
||||
{
|
||||
position = 0;
|
||||
}
|
||||
|
||||
EmitVectorExtractZx(context, reg, position++, 0);
|
||||
EmitVectorInsertTmp(context, index, 0);
|
||||
}
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
EmitVectorZeroUpper(context, op.Rd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue