Clear JIT cache on exit (#1518)

* Initial cache memory allocator implementation

* Get rid of CallFlag

* Perform cache cleanup on exit

* Basic cache invalidation

* Thats not how conditionals works in C# it seems

* Set PTC version to PR number

* Address PR feedback

* Update InstEmitFlowHelper.cs

* Flag clear on address is no longer needed

* Do not include exit block in function size calculation

* Dispose jump table

* For future use

* InternalVersion = 1519 (force retest).

Co-authored-by: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>
This commit is contained in:
gdkchan 2020-12-16 17:07:42 -03:00 committed by GitHub
parent 11222516c4
commit 61634dd415
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
24 changed files with 827 additions and 357 deletions

View file

@ -0,0 +1,26 @@
using ARMeilleure.CodeGen.Unwinding;
using System;
using System.Diagnostics.CodeAnalysis;
namespace ARMeilleure.Translation.Cache
{
struct CacheEntry : IComparable<CacheEntry>
{
public int Offset { get; }
public int Size { get; }
public UnwindInfo UnwindInfo { get; }
public CacheEntry(int offset, int size, UnwindInfo unwindInfo)
{
Offset = offset;
Size = size;
UnwindInfo = unwindInfo;
}
public int CompareTo([AllowNull] CacheEntry other)
{
return Offset.CompareTo(other.Offset);
}
}
}

View file

@ -0,0 +1,96 @@
using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
namespace ARMeilleure.Translation.Cache
{
class CacheMemoryAllocator
{
private struct MemoryBlock : IComparable<MemoryBlock>
{
public int Offset { get; }
public int Size { get; }
public MemoryBlock(int offset, int size)
{
Offset = offset;
Size = size;
}
public int CompareTo([AllowNull] MemoryBlock other)
{
return Offset.CompareTo(other.Offset);
}
}
private readonly List<MemoryBlock> _blocks = new List<MemoryBlock>();
public CacheMemoryAllocator(int capacity)
{
_blocks.Add(new MemoryBlock(0, capacity));
}
public int Allocate(int size)
{
for (int i = 0; i < _blocks.Count; i++)
{
MemoryBlock block = _blocks[i];
if (block.Size > size)
{
_blocks[i] = new MemoryBlock(block.Offset + size, block.Size - size);
return block.Offset;
}
else if (block.Size == size)
{
_blocks.RemoveAt(i);
return block.Offset;
}
}
// We don't have enough free memory to perform the allocation.
return -1;
}
public void Free(int offset, int size)
{
Insert(new MemoryBlock(offset, size));
}
private void Insert(MemoryBlock block)
{
int index = _blocks.BinarySearch(block);
if (index < 0)
{
index = ~index;
}
if (index < _blocks.Count)
{
MemoryBlock next = _blocks[index];
int endOffs = block.Offset + block.Size;
if (next.Offset == endOffs)
{
block = new MemoryBlock(block.Offset, block.Size + next.Size);
_blocks.RemoveAt(index);
}
}
if (index > 0)
{
MemoryBlock prev = _blocks[index - 1];
if (prev.Offset + prev.Size == block.Offset)
{
block = new MemoryBlock(block.Offset - prev.Size, block.Size + prev.Size);
_blocks.RemoveAt(--index);
}
}
_blocks.Insert(index, block);
}
}
}

View file

@ -0,0 +1,182 @@
using ARMeilleure.CodeGen;
using ARMeilleure.CodeGen.Unwinding;
using ARMeilleure.Memory;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.InteropServices;
namespace ARMeilleure.Translation.Cache
{
static class JitCache
{
private const int PageSize = 4 * 1024;
private const int PageMask = PageSize - 1;
private const int CodeAlignment = 4; // Bytes.
private const int CacheSize = 2047 * 1024 * 1024;
private static ReservedRegion _jitRegion;
private static CacheMemoryAllocator _cacheAllocator;
private static readonly List<CacheEntry> _cacheEntries = new List<CacheEntry>();
private static readonly object _lock = new object();
private static bool _initialized;
public static void Initialize(IJitMemoryAllocator allocator)
{
if (_initialized) return;
lock (_lock)
{
if (_initialized) return;
_jitRegion = new ReservedRegion(allocator, CacheSize);
_cacheAllocator = new CacheMemoryAllocator(CacheSize);
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
JitUnwindWindows.InstallFunctionTableHandler(_jitRegion.Pointer, CacheSize, _jitRegion.Pointer + Allocate(PageSize));
}
_initialized = true;
}
}
public static IntPtr Map(CompiledFunction func)
{
byte[] code = func.Code;
lock (_lock)
{
Debug.Assert(_initialized);
int funcOffset = Allocate(code.Length);
IntPtr funcPtr = _jitRegion.Pointer + funcOffset;
ReprotectAsWritable(funcOffset, code.Length);
Marshal.Copy(code, 0, funcPtr, code.Length);
ReprotectAsExecutable(funcOffset, code.Length);
Add(funcOffset, code.Length, func.UnwindInfo);
return funcPtr;
}
}
public static void Unmap(IntPtr pointer)
{
lock (_lock)
{
Debug.Assert(_initialized);
int funcOffset = (int)(pointer.ToInt64() - _jitRegion.Pointer.ToInt64());
bool result = TryFind(funcOffset, out CacheEntry entry);
Debug.Assert(result);
_cacheAllocator.Free(funcOffset, AlignCodeSize(entry.Size));
Remove(funcOffset);
}
}
private static void ReprotectAsWritable(int offset, int size)
{
int endOffs = offset + size;
int regionStart = offset & ~PageMask;
int regionEnd = (endOffs + PageMask) & ~PageMask;
_jitRegion.Block.MapAsRwx((ulong)regionStart, (ulong)(regionEnd - regionStart));
}
private static void ReprotectAsExecutable(int offset, int size)
{
int endOffs = offset + size;
int regionStart = offset & ~PageMask;
int regionEnd = (endOffs + PageMask) & ~PageMask;
_jitRegion.Block.MapAsRx((ulong)regionStart, (ulong)(regionEnd - regionStart));
}
private static int Allocate(int codeSize)
{
codeSize = AlignCodeSize(codeSize);
int allocOffset = _cacheAllocator.Allocate(codeSize);
if (allocOffset < 0)
{
throw new OutOfMemoryException("JIT Cache exhausted.");
}
_jitRegion.ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize);
return allocOffset;
}
private static int AlignCodeSize(int codeSize)
{
return checked(codeSize + (CodeAlignment - 1)) & ~(CodeAlignment - 1);
}
private static void Add(int offset, int size, UnwindInfo unwindInfo)
{
CacheEntry entry = new CacheEntry(offset, size, unwindInfo);
int index = _cacheEntries.BinarySearch(entry);
if (index < 0)
{
index = ~index;
}
_cacheEntries.Insert(index, entry);
}
private static void Remove(int offset)
{
int index = _cacheEntries.BinarySearch(new CacheEntry(offset, 0, default));
if (index < 0)
{
index = ~index - 1;
}
if (index >= 0)
{
_cacheEntries.RemoveAt(index);
}
}
public static bool TryFind(int offset, out CacheEntry entry)
{
lock (_lock)
{
int index = _cacheEntries.BinarySearch(new CacheEntry(offset, 0, default));
if (index < 0)
{
index = ~index - 1;
}
if (index >= 0)
{
entry = _cacheEntries[index];
return true;
}
}
entry = default;
return false;
}
}
}

View file

@ -0,0 +1,188 @@
// https://github.com/MicrosoftDocs/cpp-docs/blob/master/docs/build/exception-handling-x64.md
using ARMeilleure.CodeGen.Unwinding;
using System;
using System.Diagnostics;
using System.Runtime.InteropServices;
namespace ARMeilleure.Translation.Cache
{
static class JitUnwindWindows
{
private const int MaxUnwindCodesArraySize = 32; // Must be an even value.
private struct RuntimeFunction
{
public uint BeginAddress;
public uint EndAddress;
public uint UnwindData;
}
private struct UnwindInfo
{
public byte VersionAndFlags;
public byte SizeOfProlog;
public byte CountOfUnwindCodes;
public byte FrameRegister;
public unsafe fixed ushort UnwindCodes[MaxUnwindCodesArraySize];
}
private enum UnwindOp
{
PushNonvol = 0,
AllocLarge = 1,
AllocSmall = 2,
SetFpreg = 3,
SaveNonvol = 4,
SaveNonvolFar = 5,
SaveXmm128 = 8,
SaveXmm128Far = 9,
PushMachframe = 10
}
private unsafe delegate RuntimeFunction* GetRuntimeFunctionCallback(ulong controlPc, IntPtr context);
[DllImport("kernel32.dll", CharSet = CharSet.Unicode)]
private static unsafe extern bool RtlInstallFunctionTableCallback(
ulong tableIdentifier,
ulong baseAddress,
uint length,
GetRuntimeFunctionCallback callback,
IntPtr context,
string outOfProcessCallbackDll);
private static GetRuntimeFunctionCallback _getRuntimeFunctionCallback;
private static int _sizeOfRuntimeFunction;
private unsafe static RuntimeFunction* _runtimeFunction;
private unsafe static UnwindInfo* _unwindInfo;
public static void InstallFunctionTableHandler(IntPtr codeCachePointer, uint codeCacheLength, IntPtr workBufferPtr)
{
ulong codeCachePtr = (ulong)codeCachePointer.ToInt64();
_sizeOfRuntimeFunction = Marshal.SizeOf<RuntimeFunction>();
bool result;
unsafe
{
_runtimeFunction = (RuntimeFunction*)workBufferPtr;
_unwindInfo = (UnwindInfo*)(workBufferPtr + _sizeOfRuntimeFunction);
_getRuntimeFunctionCallback = new GetRuntimeFunctionCallback(FunctionTableHandler);
result = RtlInstallFunctionTableCallback(
codeCachePtr | 3,
codeCachePtr,
codeCacheLength,
_getRuntimeFunctionCallback,
codeCachePointer,
null);
}
if (!result)
{
throw new InvalidOperationException("Failure installing function table callback.");
}
}
private static unsafe RuntimeFunction* FunctionTableHandler(ulong controlPc, IntPtr context)
{
int offset = (int)((long)controlPc - context.ToInt64());
if (!JitCache.TryFind(offset, out CacheEntry funcEntry))
{
return null; // Not found.
}
var unwindInfo = funcEntry.UnwindInfo;
int codeIndex = 0;
for (int index = unwindInfo.PushEntries.Length - 1; index >= 0; index--)
{
var entry = unwindInfo.PushEntries[index];
switch (entry.PseudoOp)
{
case UnwindPseudoOp.SaveXmm128:
{
int stackOffset = entry.StackOffsetOrAllocSize;
Debug.Assert(stackOffset % 16 == 0);
if (stackOffset <= 0xFFFF0)
{
_unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.SaveXmm128, entry.PrologOffset, entry.RegIndex);
_unwindInfo->UnwindCodes[codeIndex++] = (ushort)(stackOffset / 16);
}
else
{
_unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.SaveXmm128Far, entry.PrologOffset, entry.RegIndex);
_unwindInfo->UnwindCodes[codeIndex++] = (ushort)(stackOffset >> 0);
_unwindInfo->UnwindCodes[codeIndex++] = (ushort)(stackOffset >> 16);
}
break;
}
case UnwindPseudoOp.AllocStack:
{
int allocSize = entry.StackOffsetOrAllocSize;
Debug.Assert(allocSize % 8 == 0);
if (allocSize <= 128)
{
_unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.AllocSmall, entry.PrologOffset, (allocSize / 8) - 1);
}
else if (allocSize <= 0x7FFF8)
{
_unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.AllocLarge, entry.PrologOffset, 0);
_unwindInfo->UnwindCodes[codeIndex++] = (ushort)(allocSize / 8);
}
else
{
_unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.AllocLarge, entry.PrologOffset, 1);
_unwindInfo->UnwindCodes[codeIndex++] = (ushort)(allocSize >> 0);
_unwindInfo->UnwindCodes[codeIndex++] = (ushort)(allocSize >> 16);
}
break;
}
case UnwindPseudoOp.PushReg:
{
_unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.PushNonvol, entry.PrologOffset, entry.RegIndex);
break;
}
default: throw new NotImplementedException($"({nameof(entry.PseudoOp)} = {entry.PseudoOp})");
}
}
Debug.Assert(codeIndex <= MaxUnwindCodesArraySize);
_unwindInfo->VersionAndFlags = 1; // Flags: The function has no handler.
_unwindInfo->SizeOfProlog = (byte)unwindInfo.PrologSize;
_unwindInfo->CountOfUnwindCodes = (byte)codeIndex;
_unwindInfo->FrameRegister = 0;
_runtimeFunction->BeginAddress = (uint)funcEntry.Offset;
_runtimeFunction->EndAddress = (uint)(funcEntry.Offset + funcEntry.Size);
_runtimeFunction->UnwindData = (uint)_sizeOfRuntimeFunction;
return _runtimeFunction;
}
private static ushort PackUnwindOp(UnwindOp op, int prologOffset, int opInfo)
{
return (ushort)(prologOffset | ((int)op << 8) | (opInfo << 12));
}
}
}

View file

@ -0,0 +1,268 @@
using ARMeilleure.Diagnostics;
using ARMeilleure.Memory;
using ARMeilleure.Translation.PTC;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.InteropServices;
namespace ARMeilleure.Translation.Cache
{
class JumpTable : IDisposable
{
// The jump table is a block of (guestAddress, hostAddress) function mappings.
// Each entry corresponds to one branch in a JIT compiled function. The entries are
// reserved specifically for each call.
// The _dependants dictionary can be used to update the hostAddress for any functions that change.
public const int JumpTableStride = 16; // 8 byte guest address, 8 byte host address.
private const int JumpTableSize = 1048576;
private const int JumpTableByteSize = JumpTableSize * JumpTableStride;
// The dynamic table is also a block of (guestAddress, hostAddress) function mappings.
// The main difference is that indirect calls and jumps reserve _multiple_ entries on the table.
// These start out as all 0. When an indirect call is made, it tries to find the guest address on the table.
// If we get to an empty address, the guestAddress is set to the call that we want.
// If we get to a guestAddress that matches our own (or we just claimed it), the hostAddress is read.
// If it is non-zero, we immediately branch or call the host function.
// If it is 0, NativeInterface is called to find the rejited address of the call.
// If none is found, the hostAddress entry stays at 0. Otherwise, the new address is placed in the entry.
// If the table size is exhausted and we didn't find our desired address, we fall back to requesting
// the function from the JIT.
public const int DynamicTableElems = 1;
public const int DynamicTableStride = DynamicTableElems * JumpTableStride;
private const int DynamicTableSize = 1048576;
private const int DynamicTableByteSize = DynamicTableSize * DynamicTableStride;
private const int DynamicEntryTag = 1 << 31;
private readonly ReservedRegion _jumpRegion;
private readonly ReservedRegion _dynamicRegion;
public IntPtr JumpPointer => _jumpRegion.Pointer;
public IntPtr DynamicPointer => _dynamicRegion.Pointer;
public JumpTableEntryAllocator Table { get; }
public JumpTableEntryAllocator DynTable { get; }
public ConcurrentDictionary<ulong, TranslatedFunction> Targets { get; }
public ConcurrentDictionary<ulong, List<int>> Dependants { get; } // TODO: Attach to TranslatedFunction or a wrapper class.
public ConcurrentDictionary<ulong, List<int>> Owners { get; }
public JumpTable(IJitMemoryAllocator allocator)
{
_jumpRegion = new ReservedRegion(allocator, JumpTableByteSize);
_dynamicRegion = new ReservedRegion(allocator, DynamicTableByteSize);
Table = new JumpTableEntryAllocator();
DynTable = new JumpTableEntryAllocator();
Targets = new ConcurrentDictionary<ulong, TranslatedFunction>();
Dependants = new ConcurrentDictionary<ulong, List<int>>();
Owners = new ConcurrentDictionary<ulong, List<int>>();
Symbols.Add((ulong)_jumpRegion.Pointer.ToInt64(), JumpTableByteSize, JumpTableStride, "JMP_TABLE");
Symbols.Add((ulong)_dynamicRegion.Pointer.ToInt64(), DynamicTableByteSize, DynamicTableStride, "DYN_TABLE");
}
public void Initialize(PtcJumpTable ptcJumpTable, ConcurrentDictionary<ulong, TranslatedFunction> funcs)
{
foreach (ulong guestAddress in ptcJumpTable.Targets)
{
if (funcs.TryGetValue(guestAddress, out TranslatedFunction func))
{
Targets.TryAdd(guestAddress, func);
}
else
{
throw new KeyNotFoundException($"({nameof(guestAddress)} = 0x{guestAddress:X16})");
}
}
foreach (var item in ptcJumpTable.Dependants)
{
Dependants.TryAdd(item.Key, new List<int>(item.Value));
}
foreach (var item in ptcJumpTable.Owners)
{
Owners.TryAdd(item.Key, new List<int>(item.Value));
}
}
public void RegisterFunction(ulong address, TranslatedFunction func)
{
Targets.AddOrUpdate(address, func, (key, oldFunc) => func);
long funcPtr = func.FuncPtr.ToInt64();
// Update all jump table entries that target this address.
if (Dependants.TryGetValue(address, out List<int> myDependants))
{
lock (myDependants)
{
foreach (int entry in myDependants)
{
IntPtr addr = GetEntryAddressJumpTable(entry);
Marshal.WriteInt64(addr, 8, funcPtr);
}
}
}
}
public int ReserveTableEntry(ulong ownerGuestAddress, ulong address, bool isJump)
{
int entry = Table.AllocateEntry();
ExpandIfNeededJumpTable(entry);
// Is the address we have already registered? If so, put the function address in the jump table.
// If not, it will point to the direct call stub.
long value = DirectCallStubs.DirectCallStub(isJump).ToInt64();
if (Targets.TryGetValue(address, out TranslatedFunction func))
{
value = func.FuncPtr.ToInt64();
}
// Make sure changes to the function at the target address update this jump table entry.
List<int> targetDependants = Dependants.GetOrAdd(address, (addr) => new List<int>());
lock (targetDependants)
{
targetDependants.Add(entry);
}
// Keep track of ownership for jump table entries.
List<int> ownerEntries = Owners.GetOrAdd(ownerGuestAddress, (addr) => new List<int>());
lock (ownerEntries)
{
ownerEntries.Add(entry);
}
IntPtr addr = GetEntryAddressJumpTable(entry);
Marshal.WriteInt64(addr, 0, (long)address);
Marshal.WriteInt64(addr, 8, value);
return entry;
}
public int ReserveDynamicEntry(ulong ownerGuestAddress, bool isJump)
{
int entry = DynTable.AllocateEntry();
ExpandIfNeededDynamicTable(entry);
// Keep track of ownership for jump table entries.
List<int> ownerEntries = Owners.GetOrAdd(ownerGuestAddress, (addr) => new List<int>());
lock (ownerEntries)
{
ownerEntries.Add(entry | DynamicEntryTag);
}
// Initialize all host function pointers to the indirect call stub.
IntPtr addr = GetEntryAddressDynamicTable(entry);
long stubPtr = DirectCallStubs.IndirectCallStub(isJump).ToInt64();
for (int i = 0; i < DynamicTableElems; i++)
{
Marshal.WriteInt64(addr, i * JumpTableStride + 8, stubPtr);
}
return entry;
}
// For future use.
public void RemoveFunctionEntries(ulong guestAddress)
{
if (Owners.TryRemove(guestAddress, out List<int> list))
{
for (int i = 0; i < list.Count; i++)
{
int entry = list[i];
bool isDynamic = (entry & DynamicEntryTag) != 0;
entry &= ~DynamicEntryTag;
if (isDynamic)
{
IntPtr addr = GetEntryAddressDynamicTable(entry);
for (int j = 0; j < DynamicTableElems; j++)
{
Marshal.WriteInt64(addr + j * JumpTableStride, 0, 0L);
Marshal.WriteInt64(addr + j * JumpTableStride, 8, 0L);
}
DynTable.FreeEntry(entry);
}
else
{
IntPtr addr = GetEntryAddressJumpTable(entry);
Marshal.WriteInt64(addr, 0, 0L);
Marshal.WriteInt64(addr, 8, 0L);
Table.FreeEntry(entry);
}
}
}
}
public void ExpandIfNeededJumpTable(int entry)
{
Debug.Assert(entry >= 0);
if (entry < JumpTableSize)
{
_jumpRegion.ExpandIfNeeded((ulong)((entry + 1) * JumpTableStride));
}
else
{
throw new OutOfMemoryException("JIT Direct Jump Table exhausted.");
}
}
public void ExpandIfNeededDynamicTable(int entry)
{
Debug.Assert(entry >= 0);
if (entry < DynamicTableSize)
{
_dynamicRegion.ExpandIfNeeded((ulong)((entry + 1) * DynamicTableStride));
}
else
{
throw new OutOfMemoryException("JIT Dynamic Jump Table exhausted.");
}
}
public IntPtr GetEntryAddressJumpTable(int entry)
{
Debug.Assert(Table.EntryIsValid(entry));
return _jumpRegion.Pointer + entry * JumpTableStride;
}
public IntPtr GetEntryAddressDynamicTable(int entry)
{
Debug.Assert(DynTable.EntryIsValid(entry));
return _dynamicRegion.Pointer + entry * DynamicTableStride;
}
public void Dispose()
{
_jumpRegion.Dispose();
_dynamicRegion.Dispose();
}
}
}

View file

@ -0,0 +1,72 @@
using ARMeilleure.Common;
using System.Collections.Generic;
using System.Diagnostics;
namespace ARMeilleure.Translation.Cache
{
class JumpTableEntryAllocator
{
private readonly BitMap _bitmap;
private int _freeHint;
public JumpTableEntryAllocator()
{
_bitmap = new BitMap();
}
public bool EntryIsValid(int entryIndex)
{
lock (_bitmap)
{
return _bitmap.IsSet(entryIndex);
}
}
public void SetEntry(int entryIndex)
{
lock (_bitmap)
{
_bitmap.Set(entryIndex);
}
}
public int AllocateEntry()
{
lock (_bitmap)
{
int entryIndex;
if (!_bitmap.IsSet(_freeHint))
{
entryIndex = _freeHint;
}
else
{
entryIndex = _bitmap.FindFirstUnset();
}
_freeHint = entryIndex + 1;
bool wasSet = _bitmap.Set(entryIndex);
Debug.Assert(wasSet);
return entryIndex;
}
}
public void FreeEntry(int entryIndex)
{
lock (_bitmap)
{
_bitmap.Clear(entryIndex);
_freeHint = entryIndex;
}
}
public IEnumerable<int> GetEntries()
{
return _bitmap;
}
}
}