Implement S8D24 texture format and tweak depth range detection (#2458)
This commit is contained in:
parent
79becc4b78
commit
e5ad1dfa48
10 changed files with 196 additions and 30 deletions
149
Ryujinx.Graphics.OpenGL/Image/FormatConverter.cs
Normal file
149
Ryujinx.Graphics.OpenGL/Image/FormatConverter.cs
Normal file
|
@ -0,0 +1,149 @@
|
|||
using System;
|
||||
using System.Numerics;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Runtime.Intrinsics;
|
||||
using System.Runtime.Intrinsics.X86;
|
||||
|
||||
namespace Ryujinx.Graphics.OpenGL.Image
|
||||
{
|
||||
static class FormatConverter
|
||||
{
|
||||
public unsafe static byte[] ConvertS8D24ToD24S8(ReadOnlySpan<byte> data)
|
||||
{
|
||||
byte[] output = new byte[data.Length];
|
||||
|
||||
int start = 0;
|
||||
|
||||
if (Avx2.IsSupported)
|
||||
{
|
||||
var mask = Vector256.Create(
|
||||
(byte)3, (byte)0, (byte)1, (byte)2,
|
||||
(byte)7, (byte)4, (byte)5, (byte)6,
|
||||
(byte)11, (byte)8, (byte)9, (byte)10,
|
||||
(byte)15, (byte)12, (byte)13, (byte)14,
|
||||
(byte)19, (byte)16, (byte)17, (byte)18,
|
||||
(byte)23, (byte)20, (byte)21, (byte)22,
|
||||
(byte)27, (byte)24, (byte)25, (byte)26,
|
||||
(byte)31, (byte)28, (byte)29, (byte)30);
|
||||
|
||||
int sizeAligned = data.Length & ~31;
|
||||
|
||||
fixed (byte* pInput = data, pOutput = output)
|
||||
{
|
||||
for (uint i = 0; i < sizeAligned; i += 32)
|
||||
{
|
||||
var dataVec = Avx.LoadVector256(pInput + i);
|
||||
|
||||
dataVec = Avx2.Shuffle(dataVec, mask);
|
||||
|
||||
Avx.Store(pOutput + i, dataVec);
|
||||
}
|
||||
}
|
||||
|
||||
start = sizeAligned;
|
||||
}
|
||||
else if (Ssse3.IsSupported)
|
||||
{
|
||||
var mask = Vector128.Create(
|
||||
(byte)3, (byte)0, (byte)1, (byte)2,
|
||||
(byte)7, (byte)4, (byte)5, (byte)6,
|
||||
(byte)11, (byte)8, (byte)9, (byte)10,
|
||||
(byte)15, (byte)12, (byte)13, (byte)14);
|
||||
|
||||
int sizeAligned = data.Length & ~15;
|
||||
|
||||
fixed (byte* pInput = data, pOutput = output)
|
||||
{
|
||||
for (uint i = 0; i < sizeAligned; i += 16)
|
||||
{
|
||||
var dataVec = Sse2.LoadVector128(pInput + i);
|
||||
|
||||
dataVec = Ssse3.Shuffle(dataVec, mask);
|
||||
|
||||
Sse2.Store(pOutput + i, dataVec);
|
||||
}
|
||||
}
|
||||
|
||||
start = sizeAligned;
|
||||
}
|
||||
|
||||
var outSpan = MemoryMarshal.Cast<byte, uint>(output);
|
||||
var dataSpan = MemoryMarshal.Cast<byte, uint>(data);
|
||||
for (int i = start / sizeof(uint); i < dataSpan.Length; i++)
|
||||
{
|
||||
outSpan[i] = BitOperations.RotateLeft(dataSpan[i], 8);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
public unsafe static byte[] ConvertD24S8ToS8D24(ReadOnlySpan<byte> data)
|
||||
{
|
||||
byte[] output = new byte[data.Length];
|
||||
|
||||
int start = 0;
|
||||
|
||||
if (Avx2.IsSupported)
|
||||
{
|
||||
var mask = Vector256.Create(
|
||||
(byte)1, (byte)2, (byte)3, (byte)0,
|
||||
(byte)5, (byte)6, (byte)7, (byte)4,
|
||||
(byte)9, (byte)10, (byte)11, (byte)8,
|
||||
(byte)13, (byte)14, (byte)15, (byte)12,
|
||||
(byte)17, (byte)18, (byte)19, (byte)16,
|
||||
(byte)21, (byte)22, (byte)23, (byte)20,
|
||||
(byte)25, (byte)26, (byte)27, (byte)24,
|
||||
(byte)29, (byte)30, (byte)31, (byte)28);
|
||||
|
||||
int sizeAligned = data.Length & ~31;
|
||||
|
||||
fixed (byte* pInput = data, pOutput = output)
|
||||
{
|
||||
for (uint i = 0; i < sizeAligned; i += 32)
|
||||
{
|
||||
var dataVec = Avx.LoadVector256(pInput + i);
|
||||
|
||||
dataVec = Avx2.Shuffle(dataVec, mask);
|
||||
|
||||
Avx.Store(pOutput + i, dataVec);
|
||||
}
|
||||
}
|
||||
|
||||
start = sizeAligned;
|
||||
}
|
||||
else if (Ssse3.IsSupported)
|
||||
{
|
||||
var mask = Vector128.Create(
|
||||
(byte)1, (byte)2, (byte)3, (byte)0,
|
||||
(byte)5, (byte)6, (byte)7, (byte)4,
|
||||
(byte)9, (byte)10, (byte)11, (byte)8,
|
||||
(byte)13, (byte)14, (byte)15, (byte)12);
|
||||
|
||||
int sizeAligned = data.Length & ~15;
|
||||
|
||||
fixed (byte* pInput = data, pOutput = output)
|
||||
{
|
||||
for (uint i = 0; i < sizeAligned; i += 16)
|
||||
{
|
||||
var dataVec = Sse2.LoadVector128(pInput + i);
|
||||
|
||||
dataVec = Ssse3.Shuffle(dataVec, mask);
|
||||
|
||||
Sse2.Store(pOutput + i, dataVec);
|
||||
}
|
||||
}
|
||||
|
||||
start = sizeAligned;
|
||||
}
|
||||
|
||||
var outSpan = MemoryMarshal.Cast<byte, uint>(output);
|
||||
var dataSpan = MemoryMarshal.Cast<byte, uint>(data);
|
||||
for (int i = start / sizeof(uint); i < dataSpan.Length; i++)
|
||||
{
|
||||
outSpan[i] = BitOperations.RotateRight(dataSpan[i], 8);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue