Add a new JIT compiler for CPU code (#693)

* Start of the ARMeilleure project

* Refactoring around the old IRAdapter, now renamed to PreAllocator

* Optimize the LowestBitSet method

* Add CLZ support and fix CLS implementation

* Add missing Equals and GetHashCode overrides on some structs, misc small tweaks

* Implement the ByteSwap IR instruction, and some refactoring on the assembler

* Implement the DivideUI IR instruction and fix 64-bits IDIV

* Correct constant operand type on CSINC

* Move division instructions implementation to InstEmitDiv

* Fix destination type for the ConditionalSelect IR instruction

* Implement UMULH and SMULH, with new IR instructions

* Fix some issues with shift instructions

* Fix constant types for BFM instructions

* Fix up new tests using the new V128 struct

* Update tests

* Move DIV tests to a separate file

* Add support for calls, and some instructions that depends on them

* Start adding support for SIMD & FP types, along with some of the related ARM instructions

* Fix some typos and the divide instruction with FP operands

* Fix wrong method call on Clz_V

* Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes

* Implement SIMD logical instructions and more misc. fixes

* Fix PSRAD x86 instruction encoding, TRN, UABD and UABDL implementations

* Implement float conversion instruction, merge in LDj3SNuD fixes, and some other misc. fixes

* Implement SIMD shift instruction and fix Dup_V

* Add SCVTF and UCVTF (vector, fixed-point) variants to the opcode table

* Fix check with tolerance on tester

* Implement FP & SIMD comparison instructions, and some fixes

* Update FCVT (Scalar) encoding on the table to support the Half-float variants

* Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes

* Use old memory access methods, made a start on SIMD memory insts support, some fixes

* Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes

* Fix arguments count with struct return values, other fixes

* More instructions

* Misc. fixes and integrate LDj3SNuD fixes

* Update tests

* Add a faster linear scan allocator, unwinding support on windows, and other changes

* Update Ryujinx.HLE

* Update Ryujinx.Graphics

* Fix V128 return pointer passing, RCX is clobbered

* Update Ryujinx.Tests

* Update ITimeZoneService

* Stop using GetFunctionPointer as that can't be called from native code, misc. fixes and tweaks

* Use generic GetFunctionPointerForDelegate method and other tweaks

* Some refactoring on the code generator, assert on invalid operations and use a separate enum for intrinsics

* Remove some unused code on the assembler

* Fix REX.W prefix regression on float conversion instructions, add some sort of profiler

* Add hardware capability detection

* Fix regression on Sha1h and revert Fcm** changes

* Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator

* Fix silly mistake introduced on last commit on CpuId

* Generate inline stack probes when the stack allocation is too large

* Initial support for the System-V ABI

* Support multiple destination operands

* Fix SSE2 VectorInsert8 path, and other fixes

* Change placement of XMM callee save and restore code to match other compilers

* Rename Dest to Destination and Inst to Instruction

* Fix a regression related to calls and the V128 type

* Add an extra space on comments to match code style

* Some refactoring

* Fix vector insert FP32 SSE2 path

* Port over the ARM32 instructions

* Avoid memory protection races on JIT Cache

* Another fix on VectorInsert FP32 (thanks to LDj3SNuD

* Float operands don't need to use the same register when VEX is supported

* Add a new register allocator, higher quality code for hot code (tier up), and other tweaks

* Some nits, small improvements on the pre allocator

* CpuThreadState is gone

* Allow changing CPU emulators with a config entry

* Add runtime identifiers on the ARMeilleure project

* Allow switching between CPUs through a config entry (pt. 2)

* Change win10-x64 to win-x64 on projects

* Update the Ryujinx project to use ARMeilleure

* Ensure that the selected register is valid on the hybrid allocator

* Allow exiting on returns to 0 (should fix test regression)

* Remove register assignments for most used variables on the hybrid allocator

* Do not use fixed registers as spill temp

* Add missing namespace and remove unneeded using

* Address PR feedback

* Fix types, etc

* Enable AssumeStrictAbiCompliance by default

* Ensure that Spill and Fill don't load or store any more than necessary
This commit is contained in:
gdkchan 2019-08-08 15:56:22 -03:00 committed by emmauss
parent 1ba58e9942
commit a731ab3a2a
310 changed files with 37389 additions and 2086 deletions

View file

@ -1,7 +1,6 @@
using ChocolArm64;
using ChocolArm64.Memory;
using ChocolArm64.State;
using ChocolArm64.Translation;
using ARMeilleure.Memory;
using ARMeilleure.State;
using ARMeilleure.Translation;
using NUnit.Framework;
@ -9,24 +8,24 @@ using Ryujinx.Tests.Unicorn;
using System;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using System.Threading;
namespace Ryujinx.Tests.Cpu
{
[TestFixture]
public class CpuTest
{
protected long Position { get; private set; }
private long _size;
private ulong _currAddress;
private long _size;
private long _entryPoint;
private ulong _entryPoint;
private IntPtr _ramPointer;
private MemoryManager _memory;
private CpuThread _thread;
private ExecutionContext _context;
private Translator _translator;
private static bool _unicornAvailable;
private UnicornAArch64 _unicornEmu;
@ -44,24 +43,24 @@ namespace Ryujinx.Tests.Cpu
[SetUp]
public void Setup()
{
Position = 0x1000;
_size = 0x1000;
_currAddress = 0x1000;
_size = 0x1000;
_entryPoint = Position;
_entryPoint = _currAddress;
_ramPointer = Marshal.AllocHGlobal(new IntPtr(_size));
_memory = new MemoryManager(_ramPointer);
_memory.Map(Position, 0, _size);
_memory.Map((long)_currAddress, 0, _size);
Translator translator = new Translator(_memory);
_context = new ExecutionContext();
_thread = new CpuThread(translator, _memory, _entryPoint);
_translator = new Translator(_memory);
if (_unicornAvailable)
{
_unicornEmu = new UnicornAArch64();
_unicornEmu.MemoryMap((ulong)Position, (ulong)_size, MemoryPermission.READ | MemoryPermission.EXEC);
_unicornEmu.PC = (ulong)_entryPoint;
_unicornEmu.MemoryMap(_currAddress, (ulong)_size, MemoryPermission.READ | MemoryPermission.EXEC);
_unicornEmu.PC = _entryPoint;
}
}
@ -70,7 +69,8 @@ namespace Ryujinx.Tests.Cpu
{
Marshal.FreeHGlobal(_ramPointer);
_memory = null;
_thread = null;
_context = null;
_translator = null;
_unicornEmu = null;
}
@ -82,51 +82,61 @@ namespace Ryujinx.Tests.Cpu
protected void Opcode(uint opcode)
{
_thread.Memory.WriteUInt32(Position, opcode);
_memory.WriteUInt32((long)_currAddress, opcode);
if (_unicornAvailable)
{
_unicornEmu.MemoryWrite32((ulong)Position, opcode);
_unicornEmu.MemoryWrite32((ulong)_currAddress, opcode);
}
Position += 4;
_currAddress += 4;
}
protected void SetThreadState(ulong x0 = 0, ulong x1 = 0, ulong x2 = 0, ulong x3 = 0, ulong x31 = 0,
Vector128<float> v0 = default(Vector128<float>),
Vector128<float> v1 = default(Vector128<float>),
Vector128<float> v2 = default(Vector128<float>),
Vector128<float> v3 = default(Vector128<float>),
Vector128<float> v4 = default(Vector128<float>),
Vector128<float> v5 = default(Vector128<float>),
Vector128<float> v30 = default(Vector128<float>),
Vector128<float> v31 = default(Vector128<float>),
bool overflow = false, bool carry = false, bool zero = false, bool negative = false,
int fpcr = 0x0, int fpsr = 0x0)
protected ExecutionContext GetContext() => _context;
protected void SetContext(ulong x0 = 0,
ulong x1 = 0,
ulong x2 = 0,
ulong x3 = 0,
ulong x31 = 0,
V128 v0 = default(V128),
V128 v1 = default(V128),
V128 v2 = default(V128),
V128 v3 = default(V128),
V128 v4 = default(V128),
V128 v5 = default(V128),
V128 v30 = default(V128),
V128 v31 = default(V128),
bool overflow = false,
bool carry = false,
bool zero = false,
bool negative = false,
int fpcr = 0,
int fpsr = 0)
{
_thread.ThreadState.X0 = x0;
_thread.ThreadState.X1 = x1;
_thread.ThreadState.X2 = x2;
_thread.ThreadState.X3 = x3;
_context.SetX(0, x0);
_context.SetX(1, x1);
_context.SetX(2, x2);
_context.SetX(3, x3);
_thread.ThreadState.X31 = x31;
_context.SetX(31, x31);
_thread.ThreadState.V0 = v0;
_thread.ThreadState.V1 = v1;
_thread.ThreadState.V2 = v2;
_thread.ThreadState.V3 = v3;
_thread.ThreadState.V4 = v4;
_thread.ThreadState.V5 = v5;
_thread.ThreadState.V30 = v30;
_thread.ThreadState.V31 = v31;
_context.SetV(0, v0);
_context.SetV(1, v1);
_context.SetV(2, v2);
_context.SetV(3, v3);
_context.SetV(4, v4);
_context.SetV(5, v5);
_context.SetV(30, v30);
_context.SetV(31, v31);
_thread.ThreadState.Overflow = overflow;
_thread.ThreadState.Carry = carry;
_thread.ThreadState.Zero = zero;
_thread.ThreadState.Negative = negative;
_context.SetPstateFlag(PState.VFlag, overflow);
_context.SetPstateFlag(PState.CFlag, carry);
_context.SetPstateFlag(PState.ZFlag, zero);
_context.SetPstateFlag(PState.NFlag, negative);
_thread.ThreadState.Fpcr = fpcr;
_thread.ThreadState.Fpsr = fpsr;
_context.Fpcr = (FPCR)fpcr;
_context.Fpsr = (FPSR)fpsr;
if (_unicornAvailable)
{
@ -137,14 +147,14 @@ namespace Ryujinx.Tests.Cpu
_unicornEmu.SP = x31;
_unicornEmu.Q[0] = v0;
_unicornEmu.Q[1] = v1;
_unicornEmu.Q[2] = v2;
_unicornEmu.Q[3] = v3;
_unicornEmu.Q[4] = v4;
_unicornEmu.Q[5] = v5;
_unicornEmu.Q[30] = v30;
_unicornEmu.Q[31] = v31;
_unicornEmu.Q[0] = V128ToSimdValue(v0);
_unicornEmu.Q[1] = V128ToSimdValue(v1);
_unicornEmu.Q[2] = V128ToSimdValue(v2);
_unicornEmu.Q[3] = V128ToSimdValue(v3);
_unicornEmu.Q[4] = V128ToSimdValue(v4);
_unicornEmu.Q[5] = V128ToSimdValue(v5);
_unicornEmu.Q[30] = V128ToSimdValue(v30);
_unicornEmu.Q[31] = V128ToSimdValue(v31);
_unicornEmu.OverflowFlag = overflow;
_unicornEmu.CarryFlag = carry;
@ -158,43 +168,41 @@ namespace Ryujinx.Tests.Cpu
protected void ExecuteOpcodes()
{
using (ManualResetEvent wait = new ManualResetEvent(false))
{
_thread.ThreadState.Break += (sender, e) => _thread.StopExecution();
_thread.WorkFinished += (sender, e) => wait.Set();
_thread.Execute();
wait.WaitOne();
}
_translator.Execute(_context, _entryPoint);
if (_unicornAvailable)
{
_unicornEmu.RunForCount((ulong)(Position - _entryPoint - 8) / 4);
_unicornEmu.RunForCount((ulong)(_currAddress - _entryPoint - 4) / 4);
}
}
protected CpuThreadState GetThreadState() => _thread.ThreadState;
protected CpuThreadState SingleOpcode(uint opcode,
ulong x0 = 0, ulong x1 = 0, ulong x2 = 0, ulong x3 = 0, ulong x31 = 0,
Vector128<float> v0 = default(Vector128<float>),
Vector128<float> v1 = default(Vector128<float>),
Vector128<float> v2 = default(Vector128<float>),
Vector128<float> v3 = default(Vector128<float>),
Vector128<float> v4 = default(Vector128<float>),
Vector128<float> v5 = default(Vector128<float>),
Vector128<float> v30 = default(Vector128<float>),
Vector128<float> v31 = default(Vector128<float>),
bool overflow = false, bool carry = false, bool zero = false, bool negative = false,
int fpcr = 0x0, int fpsr = 0x0)
protected ExecutionContext SingleOpcode(uint opcode,
ulong x0 = 0,
ulong x1 = 0,
ulong x2 = 0,
ulong x3 = 0,
ulong x31 = 0,
V128 v0 = default(V128),
V128 v1 = default(V128),
V128 v2 = default(V128),
V128 v3 = default(V128),
V128 v4 = default(V128),
V128 v5 = default(V128),
V128 v30 = default(V128),
V128 v31 = default(V128),
bool overflow = false,
bool carry = false,
bool zero = false,
bool negative = false,
int fpcr = 0,
int fpsr = 0)
{
Opcode(opcode);
Opcode(0xD4200000); // BRK #0
Opcode(0xD65F03C0); // RET
SetThreadState(x0, x1, x2, x3, x31, v0, v1, v2, v3, v4, v5, v30, v31, overflow, carry, zero, negative, fpcr, fpsr);
SetContext(x0, x1, x2, x3, x31, v0, v1, v2, v3, v4, v5, v30, v31, overflow, carry, zero, negative, fpcr, fpsr);
ExecuteOpcodes();
return GetThreadState();
return GetContext();
}
/// <summary>Rounding Mode control field.</summary>
@ -279,101 +287,101 @@ namespace Ryujinx.Tests.Cpu
ManageFpSkips(fpSkips);
}
Assert.That(_thread.ThreadState.X0, Is.EqualTo(_unicornEmu.X[0]));
Assert.That(_thread.ThreadState.X1, Is.EqualTo(_unicornEmu.X[1]));
Assert.That(_thread.ThreadState.X2, Is.EqualTo(_unicornEmu.X[2]));
Assert.That(_thread.ThreadState.X3, Is.EqualTo(_unicornEmu.X[3]));
Assert.That(_thread.ThreadState.X4, Is.EqualTo(_unicornEmu.X[4]));
Assert.That(_thread.ThreadState.X5, Is.EqualTo(_unicornEmu.X[5]));
Assert.That(_thread.ThreadState.X6, Is.EqualTo(_unicornEmu.X[6]));
Assert.That(_thread.ThreadState.X7, Is.EqualTo(_unicornEmu.X[7]));
Assert.That(_thread.ThreadState.X8, Is.EqualTo(_unicornEmu.X[8]));
Assert.That(_thread.ThreadState.X9, Is.EqualTo(_unicornEmu.X[9]));
Assert.That(_thread.ThreadState.X10, Is.EqualTo(_unicornEmu.X[10]));
Assert.That(_thread.ThreadState.X11, Is.EqualTo(_unicornEmu.X[11]));
Assert.That(_thread.ThreadState.X12, Is.EqualTo(_unicornEmu.X[12]));
Assert.That(_thread.ThreadState.X13, Is.EqualTo(_unicornEmu.X[13]));
Assert.That(_thread.ThreadState.X14, Is.EqualTo(_unicornEmu.X[14]));
Assert.That(_thread.ThreadState.X15, Is.EqualTo(_unicornEmu.X[15]));
Assert.That(_thread.ThreadState.X16, Is.EqualTo(_unicornEmu.X[16]));
Assert.That(_thread.ThreadState.X17, Is.EqualTo(_unicornEmu.X[17]));
Assert.That(_thread.ThreadState.X18, Is.EqualTo(_unicornEmu.X[18]));
Assert.That(_thread.ThreadState.X19, Is.EqualTo(_unicornEmu.X[19]));
Assert.That(_thread.ThreadState.X20, Is.EqualTo(_unicornEmu.X[20]));
Assert.That(_thread.ThreadState.X21, Is.EqualTo(_unicornEmu.X[21]));
Assert.That(_thread.ThreadState.X22, Is.EqualTo(_unicornEmu.X[22]));
Assert.That(_thread.ThreadState.X23, Is.EqualTo(_unicornEmu.X[23]));
Assert.That(_thread.ThreadState.X24, Is.EqualTo(_unicornEmu.X[24]));
Assert.That(_thread.ThreadState.X25, Is.EqualTo(_unicornEmu.X[25]));
Assert.That(_thread.ThreadState.X26, Is.EqualTo(_unicornEmu.X[26]));
Assert.That(_thread.ThreadState.X27, Is.EqualTo(_unicornEmu.X[27]));
Assert.That(_thread.ThreadState.X28, Is.EqualTo(_unicornEmu.X[28]));
Assert.That(_thread.ThreadState.X29, Is.EqualTo(_unicornEmu.X[29]));
Assert.That(_thread.ThreadState.X30, Is.EqualTo(_unicornEmu.X[30]));
Assert.That(_context.GetX(0), Is.EqualTo(_unicornEmu.X[0]));
Assert.That(_context.GetX(1), Is.EqualTo(_unicornEmu.X[1]));
Assert.That(_context.GetX(2), Is.EqualTo(_unicornEmu.X[2]));
Assert.That(_context.GetX(3), Is.EqualTo(_unicornEmu.X[3]));
Assert.That(_context.GetX(4), Is.EqualTo(_unicornEmu.X[4]));
Assert.That(_context.GetX(5), Is.EqualTo(_unicornEmu.X[5]));
Assert.That(_context.GetX(6), Is.EqualTo(_unicornEmu.X[6]));
Assert.That(_context.GetX(7), Is.EqualTo(_unicornEmu.X[7]));
Assert.That(_context.GetX(8), Is.EqualTo(_unicornEmu.X[8]));
Assert.That(_context.GetX(9), Is.EqualTo(_unicornEmu.X[9]));
Assert.That(_context.GetX(10), Is.EqualTo(_unicornEmu.X[10]));
Assert.That(_context.GetX(11), Is.EqualTo(_unicornEmu.X[11]));
Assert.That(_context.GetX(12), Is.EqualTo(_unicornEmu.X[12]));
Assert.That(_context.GetX(13), Is.EqualTo(_unicornEmu.X[13]));
Assert.That(_context.GetX(14), Is.EqualTo(_unicornEmu.X[14]));
Assert.That(_context.GetX(15), Is.EqualTo(_unicornEmu.X[15]));
Assert.That(_context.GetX(16), Is.EqualTo(_unicornEmu.X[16]));
Assert.That(_context.GetX(17), Is.EqualTo(_unicornEmu.X[17]));
Assert.That(_context.GetX(18), Is.EqualTo(_unicornEmu.X[18]));
Assert.That(_context.GetX(19), Is.EqualTo(_unicornEmu.X[19]));
Assert.That(_context.GetX(20), Is.EqualTo(_unicornEmu.X[20]));
Assert.That(_context.GetX(21), Is.EqualTo(_unicornEmu.X[21]));
Assert.That(_context.GetX(22), Is.EqualTo(_unicornEmu.X[22]));
Assert.That(_context.GetX(23), Is.EqualTo(_unicornEmu.X[23]));
Assert.That(_context.GetX(24), Is.EqualTo(_unicornEmu.X[24]));
Assert.That(_context.GetX(25), Is.EqualTo(_unicornEmu.X[25]));
Assert.That(_context.GetX(26), Is.EqualTo(_unicornEmu.X[26]));
Assert.That(_context.GetX(27), Is.EqualTo(_unicornEmu.X[27]));
Assert.That(_context.GetX(28), Is.EqualTo(_unicornEmu.X[28]));
Assert.That(_context.GetX(29), Is.EqualTo(_unicornEmu.X[29]));
Assert.That(_context.GetX(30), Is.EqualTo(_unicornEmu.X[30]));
Assert.That(_thread.ThreadState.X31, Is.EqualTo(_unicornEmu.SP));
Assert.That(_context.GetX(31), Is.EqualTo(_unicornEmu.SP));
if (fpTolerances == FpTolerances.None)
{
Assert.That(_thread.ThreadState.V0, Is.EqualTo(_unicornEmu.Q[0]));
Assert.That(V128ToSimdValue(_context.GetV(0)), Is.EqualTo(_unicornEmu.Q[0]));
}
else
{
ManageFpTolerances(fpTolerances);
}
Assert.That(_thread.ThreadState.V1, Is.EqualTo(_unicornEmu.Q[1]));
Assert.That(_thread.ThreadState.V2, Is.EqualTo(_unicornEmu.Q[2]));
Assert.That(_thread.ThreadState.V3, Is.EqualTo(_unicornEmu.Q[3]));
Assert.That(_thread.ThreadState.V4, Is.EqualTo(_unicornEmu.Q[4]));
Assert.That(_thread.ThreadState.V5, Is.EqualTo(_unicornEmu.Q[5]));
Assert.That(_thread.ThreadState.V6, Is.EqualTo(_unicornEmu.Q[6]));
Assert.That(_thread.ThreadState.V7, Is.EqualTo(_unicornEmu.Q[7]));
Assert.That(_thread.ThreadState.V8, Is.EqualTo(_unicornEmu.Q[8]));
Assert.That(_thread.ThreadState.V9, Is.EqualTo(_unicornEmu.Q[9]));
Assert.That(_thread.ThreadState.V10, Is.EqualTo(_unicornEmu.Q[10]));
Assert.That(_thread.ThreadState.V11, Is.EqualTo(_unicornEmu.Q[11]));
Assert.That(_thread.ThreadState.V12, Is.EqualTo(_unicornEmu.Q[12]));
Assert.That(_thread.ThreadState.V13, Is.EqualTo(_unicornEmu.Q[13]));
Assert.That(_thread.ThreadState.V14, Is.EqualTo(_unicornEmu.Q[14]));
Assert.That(_thread.ThreadState.V15, Is.EqualTo(_unicornEmu.Q[15]));
Assert.That(_thread.ThreadState.V16, Is.EqualTo(_unicornEmu.Q[16]));
Assert.That(_thread.ThreadState.V17, Is.EqualTo(_unicornEmu.Q[17]));
Assert.That(_thread.ThreadState.V18, Is.EqualTo(_unicornEmu.Q[18]));
Assert.That(_thread.ThreadState.V19, Is.EqualTo(_unicornEmu.Q[19]));
Assert.That(_thread.ThreadState.V20, Is.EqualTo(_unicornEmu.Q[20]));
Assert.That(_thread.ThreadState.V21, Is.EqualTo(_unicornEmu.Q[21]));
Assert.That(_thread.ThreadState.V22, Is.EqualTo(_unicornEmu.Q[22]));
Assert.That(_thread.ThreadState.V23, Is.EqualTo(_unicornEmu.Q[23]));
Assert.That(_thread.ThreadState.V24, Is.EqualTo(_unicornEmu.Q[24]));
Assert.That(_thread.ThreadState.V25, Is.EqualTo(_unicornEmu.Q[25]));
Assert.That(_thread.ThreadState.V26, Is.EqualTo(_unicornEmu.Q[26]));
Assert.That(_thread.ThreadState.V27, Is.EqualTo(_unicornEmu.Q[27]));
Assert.That(_thread.ThreadState.V28, Is.EqualTo(_unicornEmu.Q[28]));
Assert.That(_thread.ThreadState.V29, Is.EqualTo(_unicornEmu.Q[29]));
Assert.That(_thread.ThreadState.V30, Is.EqualTo(_unicornEmu.Q[30]));
Assert.That(_thread.ThreadState.V31, Is.EqualTo(_unicornEmu.Q[31]));
Assert.That(V128ToSimdValue(_context.GetV(1)), Is.EqualTo(_unicornEmu.Q[1]));
Assert.That(V128ToSimdValue(_context.GetV(2)), Is.EqualTo(_unicornEmu.Q[2]));
Assert.That(V128ToSimdValue(_context.GetV(3)), Is.EqualTo(_unicornEmu.Q[3]));
Assert.That(V128ToSimdValue(_context.GetV(4)), Is.EqualTo(_unicornEmu.Q[4]));
Assert.That(V128ToSimdValue(_context.GetV(5)), Is.EqualTo(_unicornEmu.Q[5]));
Assert.That(V128ToSimdValue(_context.GetV(6)), Is.EqualTo(_unicornEmu.Q[6]));
Assert.That(V128ToSimdValue(_context.GetV(7)), Is.EqualTo(_unicornEmu.Q[7]));
Assert.That(V128ToSimdValue(_context.GetV(8)), Is.EqualTo(_unicornEmu.Q[8]));
Assert.That(V128ToSimdValue(_context.GetV(9)), Is.EqualTo(_unicornEmu.Q[9]));
Assert.That(V128ToSimdValue(_context.GetV(10)), Is.EqualTo(_unicornEmu.Q[10]));
Assert.That(V128ToSimdValue(_context.GetV(11)), Is.EqualTo(_unicornEmu.Q[11]));
Assert.That(V128ToSimdValue(_context.GetV(12)), Is.EqualTo(_unicornEmu.Q[12]));
Assert.That(V128ToSimdValue(_context.GetV(13)), Is.EqualTo(_unicornEmu.Q[13]));
Assert.That(V128ToSimdValue(_context.GetV(14)), Is.EqualTo(_unicornEmu.Q[14]));
Assert.That(V128ToSimdValue(_context.GetV(15)), Is.EqualTo(_unicornEmu.Q[15]));
Assert.That(V128ToSimdValue(_context.GetV(16)), Is.EqualTo(_unicornEmu.Q[16]));
Assert.That(V128ToSimdValue(_context.GetV(17)), Is.EqualTo(_unicornEmu.Q[17]));
Assert.That(V128ToSimdValue(_context.GetV(18)), Is.EqualTo(_unicornEmu.Q[18]));
Assert.That(V128ToSimdValue(_context.GetV(19)), Is.EqualTo(_unicornEmu.Q[19]));
Assert.That(V128ToSimdValue(_context.GetV(20)), Is.EqualTo(_unicornEmu.Q[20]));
Assert.That(V128ToSimdValue(_context.GetV(21)), Is.EqualTo(_unicornEmu.Q[21]));
Assert.That(V128ToSimdValue(_context.GetV(22)), Is.EqualTo(_unicornEmu.Q[22]));
Assert.That(V128ToSimdValue(_context.GetV(23)), Is.EqualTo(_unicornEmu.Q[23]));
Assert.That(V128ToSimdValue(_context.GetV(24)), Is.EqualTo(_unicornEmu.Q[24]));
Assert.That(V128ToSimdValue(_context.GetV(25)), Is.EqualTo(_unicornEmu.Q[25]));
Assert.That(V128ToSimdValue(_context.GetV(26)), Is.EqualTo(_unicornEmu.Q[26]));
Assert.That(V128ToSimdValue(_context.GetV(27)), Is.EqualTo(_unicornEmu.Q[27]));
Assert.That(V128ToSimdValue(_context.GetV(28)), Is.EqualTo(_unicornEmu.Q[28]));
Assert.That(V128ToSimdValue(_context.GetV(29)), Is.EqualTo(_unicornEmu.Q[29]));
Assert.That(V128ToSimdValue(_context.GetV(30)), Is.EqualTo(_unicornEmu.Q[30]));
Assert.That(V128ToSimdValue(_context.GetV(31)), Is.EqualTo(_unicornEmu.Q[31]));
Assert.That(_thread.ThreadState.Fpcr, Is.EqualTo(_unicornEmu.Fpcr));
Assert.That(_thread.ThreadState.Fpsr & (int)fpsrMask, Is.EqualTo(_unicornEmu.Fpsr & (int)fpsrMask));
Assert.That((int)_context.Fpcr, Is.EqualTo(_unicornEmu.Fpcr));
Assert.That((int)_context.Fpsr & (int)fpsrMask, Is.EqualTo(_unicornEmu.Fpsr & (int)fpsrMask));
Assert.That(_thread.ThreadState.Overflow, Is.EqualTo(_unicornEmu.OverflowFlag));
Assert.That(_thread.ThreadState.Carry, Is.EqualTo(_unicornEmu.CarryFlag));
Assert.That(_thread.ThreadState.Zero, Is.EqualTo(_unicornEmu.ZeroFlag));
Assert.That(_thread.ThreadState.Negative, Is.EqualTo(_unicornEmu.NegativeFlag));
Assert.That(_context.GetPstateFlag(PState.VFlag), Is.EqualTo(_unicornEmu.OverflowFlag));
Assert.That(_context.GetPstateFlag(PState.CFlag), Is.EqualTo(_unicornEmu.CarryFlag));
Assert.That(_context.GetPstateFlag(PState.ZFlag), Is.EqualTo(_unicornEmu.ZeroFlag));
Assert.That(_context.GetPstateFlag(PState.NFlag), Is.EqualTo(_unicornEmu.NegativeFlag));
}
private void ManageFpSkips(FpSkips fpSkips)
{
if (fpSkips.HasFlag(FpSkips.IfNaNS))
{
if (float.IsNaN(VectorExtractSingle(_unicornEmu.Q[0], (byte)0)))
if (float.IsNaN(_unicornEmu.Q[0].AsFloat()))
{
Assert.Ignore("NaN test.");
}
}
else if (fpSkips.HasFlag(FpSkips.IfNaND))
{
if (double.IsNaN(VectorExtractDouble(_unicornEmu.Q[0], (byte)0)))
if (double.IsNaN(_unicornEmu.Q[0].AsDouble()))
{
Assert.Ignore("NaN test.");
}
@ -398,158 +406,68 @@ namespace Ryujinx.Tests.Cpu
private void ManageFpTolerances(FpTolerances fpTolerances)
{
if (!Is.EqualTo(_unicornEmu.Q[0]).ApplyTo(_thread.ThreadState.V0).IsSuccess)
bool IsNormalOrSubnormalS(float f) => float.IsNormal(f) || float.IsSubnormal(f);
bool IsNormalOrSubnormalD(double d) => double.IsNormal(d) || double.IsSubnormal(d);
if (!Is.EqualTo(_unicornEmu.Q[0]).ApplyTo(V128ToSimdValue(_context.GetV(0))).IsSuccess)
{
if (fpTolerances == FpTolerances.UpToOneUlpsS)
{
if (IsNormalOrSubnormalS(VectorExtractSingle(_unicornEmu.Q[0], (byte)0)) &&
IsNormalOrSubnormalS(VectorExtractSingle(_thread.ThreadState.V0, (byte)0)))
if (IsNormalOrSubnormalS(_unicornEmu.Q[0].AsFloat()) &&
IsNormalOrSubnormalS(_context.GetV(0).AsFloat()))
{
Assert.That (VectorExtractSingle(_thread.ThreadState.V0, (byte)0),
Is.EqualTo(VectorExtractSingle(_unicornEmu.Q[0], (byte)0)).Within(1).Ulps);
Assert.That (VectorExtractSingle(_thread.ThreadState.V0, (byte)1),
Is.EqualTo(VectorExtractSingle(_unicornEmu.Q[0], (byte)1)).Within(1).Ulps);
Assert.That (VectorExtractSingle(_thread.ThreadState.V0, (byte)2),
Is.EqualTo(VectorExtractSingle(_unicornEmu.Q[0], (byte)2)).Within(1).Ulps);
Assert.That (VectorExtractSingle(_thread.ThreadState.V0, (byte)3),
Is.EqualTo(VectorExtractSingle(_unicornEmu.Q[0], (byte)3)).Within(1).Ulps);
Assert.That (_context.GetV(0).GetFloat(0),
Is.EqualTo(_unicornEmu.Q[0].GetFloat(0)).Within(1).Ulps);
Assert.That (_context.GetV(0).GetFloat(1),
Is.EqualTo(_unicornEmu.Q[0].GetFloat(1)).Within(1).Ulps);
Assert.That (_context.GetV(0).GetFloat(2),
Is.EqualTo(_unicornEmu.Q[0].GetFloat(2)).Within(1).Ulps);
Assert.That (_context.GetV(0).GetFloat(3),
Is.EqualTo(_unicornEmu.Q[0].GetFloat(3)).Within(1).Ulps);
Console.WriteLine(fpTolerances);
}
else
{
Assert.That(_thread.ThreadState.V0, Is.EqualTo(_unicornEmu.Q[0]));
Assert.That(V128ToSimdValue(_context.GetV(0)), Is.EqualTo(_unicornEmu.Q[0]));
}
}
if (fpTolerances == FpTolerances.UpToOneUlpsD)
{
if (IsNormalOrSubnormalD(VectorExtractDouble(_unicornEmu.Q[0], (byte)0)) &&
IsNormalOrSubnormalD(VectorExtractDouble(_thread.ThreadState.V0, (byte)0)))
if (IsNormalOrSubnormalD(_unicornEmu.Q[0].AsDouble()) &&
IsNormalOrSubnormalD(_context.GetV(0).AsDouble()))
{
Assert.That (VectorExtractDouble(_thread.ThreadState.V0, (byte)0),
Is.EqualTo(VectorExtractDouble(_unicornEmu.Q[0], (byte)0)).Within(1).Ulps);
Assert.That (VectorExtractDouble(_thread.ThreadState.V0, (byte)1),
Is.EqualTo(VectorExtractDouble(_unicornEmu.Q[0], (byte)1)).Within(1).Ulps);
Assert.That (_context.GetV(0).GetDouble(0),
Is.EqualTo(_unicornEmu.Q[0].GetDouble(0)).Within(1).Ulps);
Assert.That (_context.GetV(0).GetDouble(1),
Is.EqualTo(_unicornEmu.Q[0].GetDouble(1)).Within(1).Ulps);
Console.WriteLine(fpTolerances);
}
else
{
Assert.That(_thread.ThreadState.V0, Is.EqualTo(_unicornEmu.Q[0]));
Assert.That(V128ToSimdValue(_context.GetV(0)), Is.EqualTo(_unicornEmu.Q[0]));
}
}
}
bool IsNormalOrSubnormalS(float f) => float.IsNormal(f) || float.IsSubnormal(f);
bool IsNormalOrSubnormalD(double d) => double.IsNormal(d) || double.IsSubnormal(d);
}
protected static Vector128<float> MakeVectorE0(double e0)
private static SimdValue V128ToSimdValue(V128 value)
{
if (!Sse2.IsSupported)
{
throw new PlatformNotSupportedException();
}
return Sse.StaticCast<long, float>(Sse2.SetVector128(0, BitConverter.DoubleToInt64Bits(e0)));
return new SimdValue(value.GetUInt64(0), value.GetUInt64(1));
}
protected static Vector128<float> MakeVectorE0E1(double e0, double e1)
{
if (!Sse2.IsSupported)
{
throw new PlatformNotSupportedException();
}
protected static V128 MakeVectorScalar(float value) => new V128(value);
protected static V128 MakeVectorScalar(double value) => new V128(value);
return Sse.StaticCast<long, float>(
Sse2.SetVector128(BitConverter.DoubleToInt64Bits(e1), BitConverter.DoubleToInt64Bits(e0)));
}
protected static V128 MakeVectorE0(ulong e0) => new V128(e0, 0);
protected static V128 MakeVectorE1(ulong e1) => new V128(0, e1);
protected static Vector128<float> MakeVectorE1(double e1)
{
if (!Sse2.IsSupported)
{
throw new PlatformNotSupportedException();
}
protected static V128 MakeVectorE0E1(ulong e0, ulong e1) => new V128(e0, e1);
return Sse.StaticCast<long, float>(Sse2.SetVector128(BitConverter.DoubleToInt64Bits(e1), 0));
}
protected static float VectorExtractSingle(Vector128<float> vector, byte index)
{
if (!Sse41.IsSupported)
{
throw new PlatformNotSupportedException();
}
int value = Sse41.Extract(Sse.StaticCast<float, int>(vector), index);
return BitConverter.Int32BitsToSingle(value);
}
protected static double VectorExtractDouble(Vector128<float> vector, byte index)
{
if (!Sse41.IsSupported)
{
throw new PlatformNotSupportedException();
}
long value = Sse41.Extract(Sse.StaticCast<float, long>(vector), index);
return BitConverter.Int64BitsToDouble(value);
}
protected static Vector128<float> MakeVectorE0(ulong e0)
{
if (!Sse2.IsSupported)
{
throw new PlatformNotSupportedException();
}
return Sse.StaticCast<ulong, float>(Sse2.SetVector128(0, e0));
}
protected static Vector128<float> MakeVectorE0E1(ulong e0, ulong e1)
{
if (!Sse2.IsSupported)
{
throw new PlatformNotSupportedException();
}
return Sse.StaticCast<ulong, float>(Sse2.SetVector128(e1, e0));
}
protected static Vector128<float> MakeVectorE1(ulong e1)
{
if (!Sse2.IsSupported)
{
throw new PlatformNotSupportedException();
}
return Sse.StaticCast<ulong, float>(Sse2.SetVector128(e1, 0));
}
protected static ulong GetVectorE0(Vector128<float> vector)
{
if (!Sse41.IsSupported)
{
throw new PlatformNotSupportedException();
}
return Sse41.Extract(Sse.StaticCast<float, ulong>(vector), (byte)0);
}
protected static ulong GetVectorE1(Vector128<float> vector)
{
if (!Sse41.IsSupported)
{
throw new PlatformNotSupportedException();
}
return Sse41.Extract(Sse.StaticCast<float, ulong>(vector), (byte)1);
}
protected static ulong GetVectorE0(V128 vector) => vector.GetUInt64(0);
protected static ulong GetVectorE1(V128 vector) => vector.GetUInt64(1);
protected static ushort GenNormalH()
{