Misc. CPU optimizations (#575)

* Add optimizations related to caller/callee saved registers, thread synchronization and disable tier 0

* Refactoring

* Add a config entry to enable or disable the reg load/store opt.

* Remove unnecessary register state stores for calls when the callee is know

* Rename IoType to VarType

* Enable tier 0 while fixing some perf issues related to tier 0

* Small tweak -- Compile before adding to the cache, to avoid lags

* Add required config entry
This commit is contained in:
gdkchan 2019-02-27 23:03:31 -03:00 committed by jduncanator
parent 884b4e5fd3
commit e21ebbf666
28 changed files with 456 additions and 280 deletions

View file

@ -63,48 +63,36 @@ namespace ChocolArm64.Translation
CpuTrace?.Invoke(this, new CpuTraceEventArgs(position));
}
TranslatedSub subroutine = GetOrTranslateSubroutine(state, position);
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
{
sub = TranslateLowCq(position, state.GetExecutionMode());
}
position = subroutine.Execute(state, _memory);
position = sub.Execute(state, _memory);
}
while (position != 0 && state.Running);
state.CurrentTranslator = null;
}
internal void TranslateVirtualSubroutine(CpuThreadState state, long position)
{
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub) || sub.Tier == TranslationTier.Tier0)
{
_queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1));
}
}
internal ArmSubroutine GetOrTranslateVirtualSubroutine(CpuThreadState state, long position)
internal ArmSubroutine GetOrTranslateSubroutine(CpuThreadState state, long position, CallType cs)
{
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
{
sub = TranslateLowCq(position, state.GetExecutionMode());
}
if (sub.Tier == TranslationTier.Tier0)
if (sub.IsWorthOptimizing())
{
_queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1));
bool isComplete = cs == CallType.Call ||
cs == CallType.VirtualCall;
_queue.Enqueue(position, state.GetExecutionMode(), TranslationTier.Tier1, isComplete);
}
return sub.Delegate;
}
internal TranslatedSub GetOrTranslateSubroutine(CpuThreadState state, long position)
{
if (!_cache.TryGetSubroutine(position, out TranslatedSub subroutine))
{
subroutine = TranslateLowCq(position, state.GetExecutionMode());
}
return subroutine;
}
private void TranslateQueuedSubs()
{
while (_threadCount != 0)
@ -124,7 +112,7 @@ namespace ChocolArm64.Translation
}
else
{
TranslateHighCq(item.Position, item.Mode);
TranslateHighCq(item.Position, item.Mode, item.IsComplete);
}
}
else
@ -142,14 +130,16 @@ namespace ChocolArm64.Translation
string subName = GetSubroutineName(position);
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName);
bool isAarch64 = mode == ExecutionMode.Aarch64;
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0);
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName, isAarch64);
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0, isWorthOptimizing: true);
return _cache.GetOrAdd(position, subroutine, block.OpCodes.Count);
}
private void TranslateHighCq(long position, ExecutionMode mode)
private TranslatedSub TranslateHighCq(long position, ExecutionMode mode, bool isComplete)
{
Block graph = Decoder.DecodeSubroutine(_memory, position, mode);
@ -159,9 +149,13 @@ namespace ChocolArm64.Translation
string subName = GetSubroutineName(position);
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName);
bool isAarch64 = mode == ExecutionMode.Aarch64;
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1);
isComplete &= !context.HasIndirectJump;
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName, isAarch64, isComplete);
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1, context.HasSlowCall);
int ilOpCount = 0;
@ -170,9 +164,11 @@ namespace ChocolArm64.Translation
ilOpCount += ilBlock.Count;
}
ForceAheadOfTimeCompilation(subroutine);
_cache.AddOrUpdate(position, subroutine, ilOpCount);
ForceAheadOfTimeCompilation(subroutine);
return subroutine;
}
private string GetSubroutineName(long position)