Optimize texture format conversion, and MethodCopyBuffer (#1274)
* Improve performance when converting texture formats. Still more work to do. * Speed up buffer -> texture copies. No longer copies byte by byte. Fast path when formats are identical. * Fix a few things, 64 byte block fast copy. * Spacing cleanup, unrelated change. * Fix base offset calculation for region copies. * Fix Linear -> BlockLinear * Fix some nits. (part 1 of review feedback) * Use a generic version of the Convert* functions rather than lambdas. This is some real monkey's paw shit. * Remove unnecessary span constructor. * Revert "Use a generic version of the Convert* functions rather than lambdas." This reverts commit aa43dcfbe8bba291eea4e10c68569af7a56a5851. * Fix bug with rectangle destination writing, better rectangle calculation for linear textures.
This commit is contained in:
parent
ce983f360b
commit
bea1fc2e8d
5 changed files with 356 additions and 61 deletions
|
@ -1,6 +1,7 @@
|
|||
using Ryujinx.Graphics.Gpu.State;
|
||||
using Ryujinx.Graphics.Texture;
|
||||
using System;
|
||||
using System.Runtime.Intrinsics;
|
||||
|
||||
namespace Ryujinx.Graphics.Gpu.Engine
|
||||
{
|
||||
|
@ -56,19 +57,58 @@ namespace Ryujinx.Graphics.Gpu.Engine
|
|||
ulong srcBaseAddress = _context.MemoryManager.Translate(cbp.SrcAddress.Pack());
|
||||
ulong dstBaseAddress = _context.MemoryManager.Translate(cbp.DstAddress.Pack());
|
||||
|
||||
for (int y = 0; y < cbp.YCount; y++)
|
||||
for (int x = 0; x < cbp.XCount; x++)
|
||||
(int srcBaseOffset, int srcSize) = srcCalculator.GetRectangleRange(src.RegionX, src.RegionY, cbp.XCount, cbp.YCount);
|
||||
(int dstBaseOffset, int dstSize) = dstCalculator.GetRectangleRange(dst.RegionX, dst.RegionY, cbp.XCount, cbp.YCount);
|
||||
|
||||
ReadOnlySpan<byte> srcSpan = _context.PhysicalMemory.GetSpan(srcBaseAddress + (ulong)srcBaseOffset, srcSize);
|
||||
Span<byte> dstSpan = _context.PhysicalMemory.GetSpan(dstBaseAddress + (ulong)dstBaseOffset, dstSize).ToArray();
|
||||
|
||||
bool completeSource = src.RegionX == 0 && src.RegionY == 0 && src.Width == cbp.XCount && src.Height == cbp.YCount;
|
||||
bool completeDest = dst.RegionX == 0 && dst.RegionY == 0 && dst.Width == cbp.XCount && dst.Height == cbp.YCount;
|
||||
|
||||
if (completeSource && completeDest && srcCalculator.LayoutMatches(dstCalculator))
|
||||
{
|
||||
int srcOffset = srcCalculator.GetOffset(src.RegionX + x, src.RegionY + y);
|
||||
int dstOffset = dstCalculator.GetOffset(dst.RegionX + x, dst.RegionY + y);
|
||||
|
||||
ulong srcAddress = srcBaseAddress + (ulong)srcOffset;
|
||||
ulong dstAddress = dstBaseAddress + (ulong)dstOffset;
|
||||
|
||||
ReadOnlySpan<byte> pixel = _context.PhysicalMemory.GetSpan(srcAddress, srcBpp);
|
||||
|
||||
_context.PhysicalMemory.Write(dstAddress, pixel);
|
||||
srcSpan.CopyTo(dstSpan); // No layout conversion has to be performed, just copy the data entirely.
|
||||
}
|
||||
else
|
||||
{
|
||||
unsafe bool Convert<T>(Span<byte> dstSpan, ReadOnlySpan<byte> srcSpan) where T : unmanaged
|
||||
{
|
||||
fixed (byte* dstPtr = dstSpan, srcPtr = srcSpan)
|
||||
{
|
||||
byte* dstBase = dstPtr - dstBaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset.
|
||||
byte* srcBase = srcPtr - srcBaseOffset;
|
||||
|
||||
for (int y = 0; y < cbp.YCount; y++)
|
||||
{
|
||||
srcCalculator.SetY(src.RegionY + y);
|
||||
dstCalculator.SetY(dst.RegionY + y);
|
||||
|
||||
for (int x = 0; x < cbp.XCount; x++)
|
||||
{
|
||||
int srcOffset = srcCalculator.GetOffset(src.RegionX + x);
|
||||
int dstOffset = dstCalculator.GetOffset(dst.RegionX + x);
|
||||
|
||||
*(T*)(dstBase + dstOffset) = *(T*)(srcBase + srcOffset);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool _ = srcBpp switch
|
||||
{
|
||||
1 => Convert<byte>(dstSpan, srcSpan),
|
||||
2 => Convert<ushort>(dstSpan, srcSpan),
|
||||
4 => Convert<uint>(dstSpan, srcSpan),
|
||||
8 => Convert<ulong>(dstSpan, srcSpan),
|
||||
12 => Convert<Bpp12Pixel>(dstSpan, srcSpan),
|
||||
16 => Convert<Vector128<byte>>(dstSpan, srcSpan),
|
||||
_ => throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format.")
|
||||
};
|
||||
}
|
||||
|
||||
_context.PhysicalMemory.Write(dstBaseAddress + (ulong)dstBaseOffset, dstSpan);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue