Add multi-level function table (#2228)

* Add AddressTable<T>

* Use AddressTable<T> for dispatch

* Remove JumpTable & co.

* Add fallback for out of range addresses

* Add PPTC support

* Add documentation to `AddressTable<T>`

* Make AddressTable<T> configurable

* Fix table walk

* Fix IsMapped check

* Remove CountTableCapacity

* Add PPTC support for fast path

* Rename IsMapped to IsValid

* Remove stale comment

* Change format of address in exception message

* Add TranslatorStubs

* Split DispatchStub

Avoids recompilation of stubs during tests.

* Add hint for 64bit or 32bit

* Add documentation to `Symbol`

* Add documentation to `TranslatorStubs`

Make `TranslatorStubs` disposable as well.

* Add documentation to `SymbolType`

* Add `AddressTableEventSource` to monitor function table size

Add an EventSource which measures the amount of unmanaged bytes
allocated by AddressTable<T> instances.

 dotnet-counters monitor -n Ryujinx --counters ARMeilleure

* Add `AllowLcqInFunctionTable` optimization toggle

This is to reduce the impact this change has on the test duration.
Before everytime a test was ran, the FunctionTable would be initialized
and populated so that the newly compiled test would get registered to
it.

* Implement unmanaged dispatcher

Uses the DispatchStub to dispatch into the next translation, which
allows execution to stay in unmanaged for longer and skips a
ConcurrentDictionary look up when the target translation has been
registered to the FunctionTable.

* Remove redundant null check

* Tune levels of FunctionTable

Uses 5 levels instead of 4 and change unit of AddressTableEventSource
from KB to MB.

* Use 64-bit function table

Improves codegen for direct branches:

    mov qword [rax+0x408],0x10603560
 -  mov rcx,sub_10603560_OFFSET
 -  mov ecx,[rcx]
 -  mov ecx,ecx
 -  mov rdx,JIT_CACHE_BASE
 -  add rdx,rcx
 +  mov rcx,sub_10603560
 +  mov rdx,[rcx]
    mov rcx,rax

Improves codegen for dispatch stub:

    and rax,byte +0x1f
 -  mov eax,[rcx+rax*4]
 -  mov eax,eax
 -  mov rcx,JIT_CACHE_BASE
 -  lea rax,[rcx+rax]
 +  mov rax,[rcx+rax*8]
    mov rcx,rbx

* Remove `JitCacheSymbol` & `JitCache.Offset`

* Turn `Translator.Translate` into an instance method

We do not have to add more parameter to this method and related ones as
new structures are added & needed for translation.

* Add symbol only when PTC is enabled

Address LDj3SNuD's feedback

* Change `NativeContext.Running` to a 32-bit integer

* Fix PageTable symbol for host mapped
This commit is contained in:
FICTURE7 2021-05-30 01:06:28 +04:00 committed by GitHub
parent f3b0b4831c
commit 9d7627af64
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
36 changed files with 1020 additions and 1272 deletions

View file

@ -28,7 +28,7 @@ namespace ARMeilleure.Translation.PTC
private const string OuterHeaderMagicString = "PTCohd\0\0";
private const string InnerHeaderMagicString = "PTCihd\0\0";
private const uint InternalVersion = 2289; //! To be incremented manually for each change to the ARMeilleure project.
private const uint InternalVersion = 2228; //! To be incremented manually for each change to the ARMeilleure project.
private const string ActualDir = "0";
private const string BackupDir = "1";
@ -36,10 +36,9 @@ namespace ARMeilleure.Translation.PTC
private const string TitleIdTextDefault = "0000000000000000";
private const string DisplayVersionDefault = "0";
internal const int PageTablePointerIndex = -1; // Must be a negative value.
internal const int JumpPointerIndex = -2; // Must be a negative value.
internal const int DynamicPointerIndex = -3; // Must be a negative value.
internal const int CountTableIndex = -4; // Must be a negative value.
internal static readonly Symbol PageTableSymbol = new(SymbolType.Special, 1);
internal static readonly Symbol CountTableSymbol = new(SymbolType.Special, 2);
internal static readonly Symbol DispatchStubSymbol = new(SymbolType.Special, 3);
private const byte FillingByte = 0x00;
private const CompressionLevel SaveCompressionLevel = CompressionLevel.Fastest;
@ -59,8 +58,6 @@ namespace ARMeilleure.Translation.PTC
private static bool _disposed;
internal static PtcJumpTable PtcJumpTable { get; private set; }
internal static string TitleIdText { get; private set; }
internal static string DisplayVersion { get; private set; }
@ -89,8 +86,6 @@ namespace ARMeilleure.Translation.PTC
_disposed = false;
PtcJumpTable = new PtcJumpTable();
TitleIdText = TitleIdTextDefault;
DisplayVersion = DisplayVersionDefault;
@ -348,20 +343,8 @@ namespace ARMeilleure.Translation.PTC
return false;
}
ReadOnlySpan<byte> ptcJumpTableBytes = new(stream.PositionPointer, innerHeader.PtcJumpTableLength);
stream.Seek(innerHeader.PtcJumpTableLength, SeekOrigin.Current);
Debug.Assert(stream.Position == stream.Length);
Hash128 ptcJumpTableHash = XXHash128.ComputeHash(ptcJumpTableBytes);
if (innerHeader.PtcJumpTableHash != ptcJumpTableHash)
{
InvalidateCompressedStream(compressedStream);
return false;
}
stream.Seek((long)Unsafe.SizeOf<InnerHeader>(), SeekOrigin.Begin);
_infosStream.Write(infosBytes);
@ -375,8 +358,6 @@ namespace ARMeilleure.Translation.PTC
_unwindInfosStream.Write(unwindInfosBytes);
stream.Seek(innerHeader.UnwindInfosLength, SeekOrigin.Current);
PtcJumpTable = PtcJumpTable.Deserialize(stream);
Debug.Assert(stream.Position == stream.Length);
}
}
@ -422,7 +403,6 @@ namespace ARMeilleure.Translation.PTC
finally
{
ResetCarriersIfNeeded();
PtcJumpTable.ClearIfNeeded();
GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce;
}
@ -442,7 +422,6 @@ namespace ARMeilleure.Translation.PTC
innerHeader.CodesLength = _codesList.Length();
innerHeader.RelocsLength = (int)_relocsStream.Length;
innerHeader.UnwindInfosLength = (int)_unwindInfosStream.Length;
innerHeader.PtcJumpTableLength = PtcJumpTable.GetSerializeSize(PtcJumpTable);
OuterHeader outerHeader = new OuterHeader();
@ -459,8 +438,7 @@ namespace ARMeilleure.Translation.PTC
innerHeader.InfosLength +
innerHeader.CodesLength +
innerHeader.RelocsLength +
innerHeader.UnwindInfosLength +
innerHeader.PtcJumpTableLength;
innerHeader.UnwindInfosLength;
outerHeader.SetHeaderHash();
@ -486,16 +464,12 @@ namespace ARMeilleure.Translation.PTC
ReadOnlySpan<byte> unwindInfosBytes = new(stream.PositionPointer, innerHeader.UnwindInfosLength);
_unwindInfosStream.WriteTo(stream);
ReadOnlySpan<byte> ptcJumpTableBytes = new(stream.PositionPointer, innerHeader.PtcJumpTableLength);
PtcJumpTable.Serialize(stream, PtcJumpTable);
Debug.Assert(stream.Position == stream.Length);
innerHeader.InfosHash = XXHash128.ComputeHash(infosBytes);
innerHeader.CodesHash = XXHash128.ComputeHash(codesBytes);
innerHeader.RelocsHash = XXHash128.ComputeHash(relocsBytes);
innerHeader.UnwindInfosHash = XXHash128.ComputeHash(unwindInfosBytes);
innerHeader.PtcJumpTableHash = XXHash128.ComputeHash(ptcJumpTableBytes);
innerHeader.SetHeaderHash();
@ -505,7 +479,6 @@ namespace ARMeilleure.Translation.PTC
translatedFuncsCount = GetEntriesCount();
ResetCarriersIfNeeded();
PtcJumpTable.ClearIfNeeded();
using (FileStream compressedStream = new(fileName, FileMode.OpenOrCreate))
using (DeflateStream deflateStream = new(compressedStream, SaveCompressionLevel, true))
@ -545,11 +518,7 @@ namespace ARMeilleure.Translation.PTC
}
}
internal static void LoadTranslations(
ConcurrentDictionary<ulong, TranslatedFunction> funcs,
IMemoryManager memory,
JumpTable jumpTable,
EntryTable<uint> countTable)
internal static void LoadTranslations(Translator translator)
{
if (AreCarriersEmpty())
{
@ -580,7 +549,7 @@ namespace ARMeilleure.Translation.PTC
continue;
}
bool isEntryChanged = infoEntry.Hash != ComputeHash(memory, infoEntry.Address, infoEntry.GuestSize);
bool isEntryChanged = infoEntry.Hash != ComputeHash(translator.Memory, infoEntry.Address, infoEntry.GuestSize);
if (isEntryChanged || (!infoEntry.HighCq && PtcProfiler.ProfiledFuncs.TryGetValue(infoEntry.Address, out var value) && value.HighCq))
{
@ -594,8 +563,6 @@ namespace ARMeilleure.Translation.PTC
if (isEntryChanged)
{
PtcJumpTable.Clean(infoEntry.Address);
Logger.Info?.Print(LogClass.Ptc, $"Invalidated translated function (address: 0x{infoEntry.Address:X16})");
}
@ -610,14 +577,16 @@ namespace ARMeilleure.Translation.PTC
{
RelocEntry[] relocEntries = GetRelocEntries(relocsReader, infoEntry.RelocEntriesCount);
PatchCode(code, relocEntries, memory.PageTablePointer, jumpTable, countTable, out callCounter);
PatchCode(translator, code, relocEntries, out callCounter);
}
UnwindInfo unwindInfo = ReadUnwindInfo(unwindInfosReader);
TranslatedFunction func = FastTranslate(code, callCounter, infoEntry.GuestSize, unwindInfo, infoEntry.HighCq);
bool isAddressUnique = funcs.TryAdd(infoEntry.Address, func);
translator.RegisterFunction(infoEntry.Address, func);
bool isAddressUnique = translator.Functions.TryAdd(infoEntry.Address, func);
Debug.Assert(isAddressUnique, $"The address 0x{infoEntry.Address:X16} is not unique.");
}
@ -630,12 +599,7 @@ namespace ARMeilleure.Translation.PTC
throw new Exception("The length of a memory stream has changed, or its position has not reached or has exceeded its end.");
}
jumpTable.Initialize(PtcJumpTable, funcs);
PtcJumpTable.WriteJumpTable(jumpTable, funcs);
PtcJumpTable.WriteDynamicTable(jumpTable);
Logger.Info?.Print(LogClass.Ptc, $"{funcs.Count} translated functions loaded");
Logger.Info?.Print(LogClass.Ptc, $"{translator.Functions.Count} translated functions loaded");
}
private static int GetEntriesCount()
@ -676,56 +640,63 @@ namespace ARMeilleure.Translation.PTC
for (int i = 0; i < relocEntriesCount; i++)
{
int position = relocsReader.ReadInt32();
int index = relocsReader.ReadInt32();
SymbolType type = (SymbolType)relocsReader.ReadByte();
ulong value = relocsReader.ReadUInt64();
relocEntries[i] = new RelocEntry(position, index);
relocEntries[i] = new RelocEntry(position, new Symbol(type, value));
}
return relocEntries;
}
private static void PatchCode(
Span<byte> code,
RelocEntry[] relocEntries,
IntPtr pageTablePointer,
JumpTable jumpTable,
EntryTable<uint> countTable,
out Counter<uint> callCounter)
private static void PatchCode(Translator translator, Span<byte> code, RelocEntry[] relocEntries, out Counter<uint> callCounter)
{
callCounter = null;
foreach (RelocEntry relocEntry in relocEntries)
{
ulong imm;
IntPtr? imm = null;
Symbol symbol = relocEntry.Symbol;
if (relocEntry.Index == PageTablePointerIndex)
if (symbol.Type == SymbolType.FunctionTable)
{
imm = (ulong)pageTablePointer.ToInt64();
}
else if (relocEntry.Index == JumpPointerIndex)
{
imm = (ulong)jumpTable.JumpPointer.ToInt64();
}
else if (relocEntry.Index == DynamicPointerIndex)
{
imm = (ulong)jumpTable.DynamicPointer.ToInt64();
}
else if (relocEntry.Index == CountTableIndex)
{
callCounter = new Counter<uint>(countTable);
ulong guestAddress = symbol.Value;
unsafe { imm = (ulong)Unsafe.AsPointer(ref callCounter.Value); }
if (translator.FunctionTable.IsValid(guestAddress))
{
unsafe { imm = (IntPtr)Unsafe.AsPointer(ref translator.FunctionTable.GetValue(guestAddress)); }
}
}
else if (Delegates.TryGetDelegateFuncPtrByIndex(relocEntry.Index, out IntPtr funcPtr))
else if (symbol.Type == SymbolType.DelegateTable)
{
imm = (ulong)funcPtr.ToInt64();
int index = (int)symbol.Value;
if (Delegates.TryGetDelegateFuncPtrByIndex(index, out IntPtr funcPtr))
{
imm = funcPtr;
}
}
else
else if (symbol == PageTableSymbol)
{
imm = translator.Memory.PageTablePointer;
}
else if (symbol == CountTableSymbol)
{
callCounter = new Counter<uint>(translator.CountTable);
unsafe { imm = (IntPtr)Unsafe.AsPointer(ref callCounter.Value); }
}
else if (symbol == DispatchStubSymbol)
{
imm = translator.Stubs.DispatchStub;
}
if (imm == null)
{
throw new Exception($"Unexpected reloc entry {relocEntry}.");
}
BinaryPrimitives.WriteUInt64LittleEndian(code.Slice(relocEntry.Position, 8), imm);
BinaryPrimitives.WriteUInt64LittleEndian(code.Slice(relocEntry.Position, 8), (ulong)imm.Value);
}
}
@ -798,13 +769,9 @@ namespace ARMeilleure.Translation.PTC
}
}
internal static void MakeAndSaveTranslations(
ConcurrentDictionary<ulong, TranslatedFunction> funcs,
IMemoryManager memory,
JumpTable jumpTable,
EntryTable<uint> countTable)
internal static void MakeAndSaveTranslations(Translator translator)
{
var profiledFuncsToTranslate = PtcProfiler.GetProfiledFuncsToTranslate(funcs);
var profiledFuncsToTranslate = PtcProfiler.GetProfiledFuncsToTranslate(translator.Functions);
_translateCount = 0;
_translateTotalCount = profiledFuncsToTranslate.Count;
@ -814,7 +781,6 @@ namespace ARMeilleure.Translation.PTC
if (_translateTotalCount == 0 || degreeOfParallelism == 0)
{
ResetCarriersIfNeeded();
PtcJumpTable.ClearIfNeeded();
GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce;
@ -844,19 +810,16 @@ namespace ARMeilleure.Translation.PTC
Debug.Assert(PtcProfiler.IsAddressInStaticCodeRange(address));
TranslatedFunction func = Translator.Translate(memory, jumpTable, countTable, address, item.funcProfile.Mode, item.funcProfile.HighCq);
TranslatedFunction func = translator.Translate(address, item.funcProfile.Mode, item.funcProfile.HighCq);
bool isAddressUnique = funcs.TryAdd(address, func);
bool isAddressUnique = translator.Functions.TryAdd(address, func);
Debug.Assert(isAddressUnique, $"The address 0x{address:X16} is not unique.");
if (func.HighCq)
{
jumpTable.RegisterFunction(address, func);
}
Interlocked.Increment(ref _translateCount);
translator.RegisterFunction(address, func);
if (State != PtcState.Enabled)
{
break;
@ -888,11 +851,6 @@ namespace ARMeilleure.Translation.PTC
Logger.Info?.Print(LogClass.Ptc, $"{_translateCount} of {_translateTotalCount} functions translated | Thread count: {degreeOfParallelism}");
PtcJumpTable.Initialize(jumpTable);
PtcJumpTable.ReadJumpTable(jumpTable);
PtcJumpTable.ReadDynamicTable(jumpTable);
Thread preSaveThread = new Thread(PreSave);
preSaveThread.IsBackground = true;
preSaveThread.Start();
@ -1022,13 +980,11 @@ namespace ARMeilleure.Translation.PTC
public long CodesLength;
public int RelocsLength;
public int UnwindInfosLength;
public int PtcJumpTableLength;
public Hash128 InfosHash;
public Hash128 CodesHash;
public Hash128 RelocsHash;
public Hash128 UnwindInfosHash;
public Hash128 PtcJumpTableHash;
public Hash128 HeaderHash;