Interrupt GPU command processing when a frame's fence is reached. (#1741)

* Interrupt GPU command processing when a frame's fence is reached.

* Accumulate times rather than %s

* Accurate timer for vsync

Spin wait for the last .667ms of a frame. Avoids issues caused by signalling 16ms vsync. (periodic stutters in smo)

* Use event wait for better timing.

* Fix lazy wait

Windows doesn't seem to want to do 1ms consistently, so force a spin if we're less than 2ms.

* A bit more efficiency on frame waits.

Should now wait the remainder 0.6667 instead of 1.6667 sometimes (odd waits above 1ms are reliable, unlike 1ms waits)

* Better swap interval 0 solution

737 fps without breaking a sweat. Downside: Vsync can no longer be disabled on games that use the event heavily (link's awakening - which is ok since it breaks anyways)

* Fix comment.

* Address Comments.
This commit is contained in:
riperiperi 2020-12-17 18:39:52 +00:00 committed by GitHub
parent eae39f80e7
commit 10aa11ce13
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 146 additions and 30 deletions

View file

@ -25,8 +25,12 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
private Stopwatch _chrono;
private ManualResetEvent _event = new ManualResetEvent(false);
private AutoResetEvent _nextFrameEvent = new AutoResetEvent(true);
private long _ticks;
private long _ticksPerFrame;
private long _spinTicks;
private long _1msTicks;
private int _swapInterval;
@ -61,8 +65,11 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
};
_chrono = new Stopwatch();
_chrono.Start();
_ticks = 0;
_spinTicks = Stopwatch.Frequency / 500;
_1msTicks = Stopwatch.Frequency / 1000;
UpdateSwapInterval(1);
@ -76,6 +83,7 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
// If the swap interval is 0, Game VSync is disabled.
if (_swapInterval == 0)
{
_nextFrameEvent.Set();
_ticksPerFrame = 1;
}
else
@ -129,6 +137,11 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
BufferQueueCore core = BufferQueue.CreateBufferQueue(_device, pid, out BufferQueueProducer producer, out BufferQueueConsumer consumer);
core.BufferQueued += () =>
{
_nextFrameEvent.Set();
};
_layers.Add(layerId, new Layer
{
ProducerBinderId = HOSBinderDriverServer.RegisterBinderObject(producer),
@ -189,23 +202,59 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
{
_isRunning = true;
long lastTicks = _chrono.ElapsedTicks;
while (_isRunning)
{
_ticks += _chrono.ElapsedTicks;
long ticks = _chrono.ElapsedTicks;
_chrono.Restart();
if (_ticks >= _ticksPerFrame)
if (_swapInterval == 0)
{
Compose();
_device.System?.SignalVsync();
_ticks = Math.Min(_ticks - _ticksPerFrame, _ticksPerFrame);
_nextFrameEvent.WaitOne(17);
lastTicks = ticks;
}
else
{
_ticks += ticks - lastTicks;
lastTicks = ticks;
// Sleep the minimal amount of time to avoid being too expensive.
Thread.Sleep(1);
if (_ticks >= _ticksPerFrame)
{
Compose();
_device.System?.SignalVsync();
// Apply a maximum bound of 3 frames to the tick remainder, in case some event causes Ryujinx to pause for a long time or messes with the timer.
_ticks = Math.Min(_ticks - _ticksPerFrame, _ticksPerFrame * 3);
}
// Sleep if possible. If the time til the next frame is too low, spin wait instead.
long diff = _ticksPerFrame - (_ticks + _chrono.ElapsedTicks - ticks);
if (diff > 0)
{
if (diff < _spinTicks)
{
do
{
// SpinWait is a little more HT/SMT friendly than aggressively updating/checking ticks.
// The value of 5 still gives us quite a bit of precision (~0.0003ms variance at worst) while waiting a reasonable amount of time.
Thread.SpinWait(5);
ticks = _chrono.ElapsedTicks;
_ticks += ticks - lastTicks;
lastTicks = ticks;
} while (_ticks < _ticksPerFrame);
}
else
{
_event.WaitOne((int)(diff / _1msTicks));
}
}
}
}
}
@ -299,6 +348,12 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
Item = item,
};
item.Fence.RegisterCallback(_device.Gpu, () =>
{
_device.Gpu.Window.SignalFrameReady();
_device.Gpu.GPFifo.Interrupt();
});
_device.Gpu.Window.EnqueueFrameThreadSafe(
frameBufferAddress,
frameBufferWidth,