Implement Fast Paths for most A32 SIMD instructions (#952)

* Begin work on A32 SIMD Intrinsics

* More instructions, some cleanup.

* Intrinsics for Move instructions (zip etc)

These pass the existing tests.

* Intrinsics for some of Cvt

While doing this I noticed that the conversion for int/fp was incorrect
in the slow path. I'll fix this in the original repo.

* Intrinsics for more Arithmetic instructions.

* Intrinsics for Vext

* Fix VEXT Intrinsic for double words.

* Use InsertPs to move scalar values.

* Cleanup, fix VPADD.f32 and VMIN signed integer.

* Cleanup, add SSE2 support for scalar insert.

Works similarly to the IR scalar insert, but obviously this one works
directly on V128.

* Minor cleanup.

* Enable intrinsic for FP64 to integer conversion.

* Address feedback apart from splitting out intrinsic float abs

Also: bad VREV encodings as undefined rather than throwing in translation.

* Move float abs to helper, fix bug with cvt

* Rename opc2 & 3 to match A32 docs, use ArgumentOutOfRangeException appropriately.

* Get name of variable at compilation rather than string literal.

* Use correct double sign mask.
This commit is contained in:
jduncanator 2020-03-05 11:41:33 +11:00 committed by GitHub
parent d9ed827696
commit 68e15c1a74
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 2077 additions and 400 deletions

View file

@ -186,9 +186,7 @@ namespace ARMeilleure.Instructions
{
Operand res = context.AddIntrinsic(Intrinsic.X86Subss, GetVec(op.Rn), GetVec(op.Rm));
Operand mask = X86GetScalar(context, -0f);
res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res);
res = EmitFloatAbs(context, res, true, false);
context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
}
@ -196,9 +194,7 @@ namespace ARMeilleure.Instructions
{
Operand res = context.AddIntrinsic(Intrinsic.X86Subsd, GetVec(op.Rn), GetVec(op.Rm));
Operand mask = X86GetScalar(context, -0d);
res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res);
res = EmitFloatAbs(context, res, false, false);
context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
}
@ -226,9 +222,7 @@ namespace ARMeilleure.Instructions
{
Operand res = context.AddIntrinsic(Intrinsic.X86Subps, GetVec(op.Rn), GetVec(op.Rm));
Operand mask = X86GetAllElements(context, -0f);
res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res);
res = EmitFloatAbs(context, res, true, true);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -241,9 +235,7 @@ namespace ARMeilleure.Instructions
{
Operand res = context.AddIntrinsic(Intrinsic.X86Subpd, GetVec(op.Rn), GetVec(op.Rm));
Operand mask = X86GetAllElements(context, -0d);
res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res);
res = EmitFloatAbs(context, res, false, true);
context.Copy(GetVec(op.Rd), res);
}
@ -267,17 +259,13 @@ namespace ARMeilleure.Instructions
if (op.Size == 0)
{
Operand mask = X86GetScalar(context, -0f);
Operand res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, GetVec(op.Rn));
Operand res = EmitFloatAbs(context, GetVec(op.Rn), true, false);
context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
}
else /* if (op.Size == 1) */
{
Operand mask = X86GetScalar(context, -0d);
Operand res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, GetVec(op.Rn));
Operand res = EmitFloatAbs(context, GetVec(op.Rn), false, false);
context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
}
@ -299,11 +287,9 @@ namespace ARMeilleure.Instructions
int sizeF = op.Size & 1;
if (sizeF == 0)
if (sizeF == 0)
{
Operand mask = X86GetAllElements(context, -0f);
Operand res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, GetVec(op.Rn));
Operand res = EmitFloatAbs(context, GetVec(op.Rn), true, true);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -314,9 +300,7 @@ namespace ARMeilleure.Instructions
}
else /* if (sizeF == 1) */
{
Operand mask = X86GetAllElements(context, -0d);
Operand res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, GetVec(op.Rn));
Operand res = EmitFloatAbs(context, GetVec(op.Rn), false, true);
context.Copy(GetVec(op.Rd), res);
}
@ -3121,7 +3105,7 @@ namespace ARMeilleure.Instructions
context.Copy(GetVec(op.Rd), res);
}
private static Operand EmitSse2VectorIsQNaNOpF(ArmEmitterContext context, Operand opF)
public static Operand EmitSse2VectorIsQNaNOpF(ArmEmitterContext context, Operand opF)
{
IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;