Rewrite shader decoding stage (#2698)

* Rewrite shader decoding stage

* Fix P2R constant buffer encoding

* Fix PSET/PSETP

* PR feedback

* Log unimplemented shader instructions

* Implement NOP

* Remove using

* PR feedback
This commit is contained in:
gdkchan 2021-10-12 17:35:31 -03:00 committed by GitHub
parent 0510fde25a
commit a7109c767b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
168 changed files with 12022 additions and 6388 deletions

View file

@ -0,0 +1,526 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.Translation;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void AtomCas(EmitterContext context)
{
InstAtomCas op = context.GetOp<InstAtomCas>();
context.Config.GpuAccessor.Log("Shader instruction AtomCas is not implemented.");
}
public static void AtomsCas(EmitterContext context)
{
InstAtomsCas op = context.GetOp<InstAtomsCas>();
context.Config.GpuAccessor.Log("Shader instruction AtomsCas is not implemented.");
}
public static void B2r(EmitterContext context)
{
InstB2r op = context.GetOp<InstB2r>();
context.Config.GpuAccessor.Log("Shader instruction B2r is not implemented.");
}
public static void Bpt(EmitterContext context)
{
InstBpt op = context.GetOp<InstBpt>();
context.Config.GpuAccessor.Log("Shader instruction Bpt is not implemented.");
}
public static void Cctl(EmitterContext context)
{
InstCctl op = context.GetOp<InstCctl>();
context.Config.GpuAccessor.Log("Shader instruction Cctl is not implemented.");
}
public static void Cctll(EmitterContext context)
{
InstCctll op = context.GetOp<InstCctll>();
context.Config.GpuAccessor.Log("Shader instruction Cctll is not implemented.");
}
public static void Cctlt(EmitterContext context)
{
InstCctlt op = context.GetOp<InstCctlt>();
context.Config.GpuAccessor.Log("Shader instruction Cctlt is not implemented.");
}
public static void Cont(EmitterContext context)
{
InstContUnsup op = context.GetOp<InstContUnsup>();
context.Config.GpuAccessor.Log("Shader instruction ContUnsup is not implemented.");
}
public static void Cset(EmitterContext context)
{
InstCsetUnsup op = context.GetOp<InstCsetUnsup>();
context.Config.GpuAccessor.Log("Shader instruction CsetUnsup is not implemented.");
}
public static void Cs2r(EmitterContext context)
{
InstCs2r op = context.GetOp<InstCs2r>();
context.Config.GpuAccessor.Log("Shader instruction Cs2r is not implemented.");
}
public static void DmnmxR(EmitterContext context)
{
InstDmnmxR op = context.GetOp<InstDmnmxR>();
context.Config.GpuAccessor.Log("Shader instruction DmnmxR is not implemented.");
}
public static void DmnmxI(EmitterContext context)
{
InstDmnmxI op = context.GetOp<InstDmnmxI>();
context.Config.GpuAccessor.Log("Shader instruction DmnmxI is not implemented.");
}
public static void DmnmxC(EmitterContext context)
{
InstDmnmxC op = context.GetOp<InstDmnmxC>();
context.Config.GpuAccessor.Log("Shader instruction DmnmxC is not implemented.");
}
public static void DsetR(EmitterContext context)
{
InstDsetR op = context.GetOp<InstDsetR>();
context.Config.GpuAccessor.Log("Shader instruction DsetR is not implemented.");
}
public static void DsetI(EmitterContext context)
{
InstDsetI op = context.GetOp<InstDsetI>();
context.Config.GpuAccessor.Log("Shader instruction DsetI is not implemented.");
}
public static void DsetC(EmitterContext context)
{
InstDsetC op = context.GetOp<InstDsetC>();
context.Config.GpuAccessor.Log("Shader instruction DsetC is not implemented.");
}
public static void DsetpR(EmitterContext context)
{
InstDsetpR op = context.GetOp<InstDsetpR>();
context.Config.GpuAccessor.Log("Shader instruction DsetpR is not implemented.");
}
public static void DsetpI(EmitterContext context)
{
InstDsetpI op = context.GetOp<InstDsetpI>();
context.Config.GpuAccessor.Log("Shader instruction DsetpI is not implemented.");
}
public static void DsetpC(EmitterContext context)
{
InstDsetpC op = context.GetOp<InstDsetpC>();
context.Config.GpuAccessor.Log("Shader instruction DsetpC is not implemented.");
}
public static void FchkR(EmitterContext context)
{
InstFchkR op = context.GetOp<InstFchkR>();
context.Config.GpuAccessor.Log("Shader instruction FchkR is not implemented.");
}
public static void FchkI(EmitterContext context)
{
InstFchkI op = context.GetOp<InstFchkI>();
context.Config.GpuAccessor.Log("Shader instruction FchkI is not implemented.");
}
public static void FchkC(EmitterContext context)
{
InstFchkC op = context.GetOp<InstFchkC>();
context.Config.GpuAccessor.Log("Shader instruction FchkC is not implemented.");
}
public static void Getcrsptr(EmitterContext context)
{
InstGetcrsptr op = context.GetOp<InstGetcrsptr>();
context.Config.GpuAccessor.Log("Shader instruction Getcrsptr is not implemented.");
}
public static void Getlmembase(EmitterContext context)
{
InstGetlmembase op = context.GetOp<InstGetlmembase>();
context.Config.GpuAccessor.Log("Shader instruction Getlmembase is not implemented.");
}
public static void Ide(EmitterContext context)
{
InstIde op = context.GetOp<InstIde>();
context.Config.GpuAccessor.Log("Shader instruction Ide is not implemented.");
}
public static void IdpR(EmitterContext context)
{
InstIdpR op = context.GetOp<InstIdpR>();
context.Config.GpuAccessor.Log("Shader instruction IdpR is not implemented.");
}
public static void IdpC(EmitterContext context)
{
InstIdpC op = context.GetOp<InstIdpC>();
context.Config.GpuAccessor.Log("Shader instruction IdpC is not implemented.");
}
public static void ImadspR(EmitterContext context)
{
InstImadspR op = context.GetOp<InstImadspR>();
context.Config.GpuAccessor.Log("Shader instruction ImadspR is not implemented.");
}
public static void ImadspI(EmitterContext context)
{
InstImadspI op = context.GetOp<InstImadspI>();
context.Config.GpuAccessor.Log("Shader instruction ImadspI is not implemented.");
}
public static void ImadspC(EmitterContext context)
{
InstImadspC op = context.GetOp<InstImadspC>();
context.Config.GpuAccessor.Log("Shader instruction ImadspC is not implemented.");
}
public static void ImadspRc(EmitterContext context)
{
InstImadspRc op = context.GetOp<InstImadspRc>();
context.Config.GpuAccessor.Log("Shader instruction ImadspRc is not implemented.");
}
public static void ImulR(EmitterContext context)
{
InstImulR op = context.GetOp<InstImulR>();
context.Config.GpuAccessor.Log("Shader instruction ImulR is not implemented.");
}
public static void ImulI(EmitterContext context)
{
InstImulI op = context.GetOp<InstImulI>();
context.Config.GpuAccessor.Log("Shader instruction ImulI is not implemented.");
}
public static void ImulC(EmitterContext context)
{
InstImulC op = context.GetOp<InstImulC>();
context.Config.GpuAccessor.Log("Shader instruction ImulC is not implemented.");
}
public static void Imul32i(EmitterContext context)
{
InstImul32i op = context.GetOp<InstImul32i>();
context.Config.GpuAccessor.Log("Shader instruction Imul32i is not implemented.");
}
public static void Jcal(EmitterContext context)
{
InstJcal op = context.GetOp<InstJcal>();
context.Config.GpuAccessor.Log("Shader instruction Jcal is not implemented.");
}
public static void Jmp(EmitterContext context)
{
InstJmp op = context.GetOp<InstJmp>();
context.Config.GpuAccessor.Log("Shader instruction Jmp is not implemented.");
}
public static void Jmx(EmitterContext context)
{
InstJmx op = context.GetOp<InstJmx>();
context.Config.GpuAccessor.Log("Shader instruction Jmx is not implemented.");
}
public static void Ld(EmitterContext context)
{
InstLd op = context.GetOp<InstLd>();
context.Config.GpuAccessor.Log("Shader instruction Ld is not implemented.");
}
public static void Lepc(EmitterContext context)
{
InstLepc op = context.GetOp<InstLepc>();
context.Config.GpuAccessor.Log("Shader instruction Lepc is not implemented.");
}
public static void Longjmp(EmitterContext context)
{
InstLongjmp op = context.GetOp<InstLongjmp>();
context.Config.GpuAccessor.Log("Shader instruction Longjmp is not implemented.");
}
public static void P2rR(EmitterContext context)
{
InstP2rR op = context.GetOp<InstP2rR>();
context.Config.GpuAccessor.Log("Shader instruction P2rR is not implemented.");
}
public static void P2rI(EmitterContext context)
{
InstP2rI op = context.GetOp<InstP2rI>();
context.Config.GpuAccessor.Log("Shader instruction P2rI is not implemented.");
}
public static void P2rC(EmitterContext context)
{
InstP2rC op = context.GetOp<InstP2rC>();
context.Config.GpuAccessor.Log("Shader instruction P2rC is not implemented.");
}
public static void Pcnt(EmitterContext context)
{
InstPcnt op = context.GetOp<InstPcnt>();
context.Config.GpuAccessor.Log("Shader instruction Pcnt is not implemented.");
}
public static void Pexit(EmitterContext context)
{
InstPexit op = context.GetOp<InstPexit>();
context.Config.GpuAccessor.Log("Shader instruction Pexit is not implemented.");
}
public static void Pixld(EmitterContext context)
{
InstPixld op = context.GetOp<InstPixld>();
context.Config.GpuAccessor.Log("Shader instruction Pixld is not implemented.");
}
public static void Plongjmp(EmitterContext context)
{
InstPlongjmp op = context.GetOp<InstPlongjmp>();
context.Config.GpuAccessor.Log("Shader instruction Plongjmp is not implemented.");
}
public static void Pret(EmitterContext context)
{
InstPret op = context.GetOp<InstPret>();
context.Config.GpuAccessor.Log("Shader instruction Pret is not implemented.");
}
public static void PrmtR(EmitterContext context)
{
InstPrmtR op = context.GetOp<InstPrmtR>();
context.Config.GpuAccessor.Log("Shader instruction PrmtR is not implemented.");
}
public static void PrmtI(EmitterContext context)
{
InstPrmtI op = context.GetOp<InstPrmtI>();
context.Config.GpuAccessor.Log("Shader instruction PrmtI is not implemented.");
}
public static void PrmtC(EmitterContext context)
{
InstPrmtC op = context.GetOp<InstPrmtC>();
context.Config.GpuAccessor.Log("Shader instruction PrmtC is not implemented.");
}
public static void PrmtRc(EmitterContext context)
{
InstPrmtRc op = context.GetOp<InstPrmtRc>();
context.Config.GpuAccessor.Log("Shader instruction PrmtRc is not implemented.");
}
public static void R2b(EmitterContext context)
{
InstR2b op = context.GetOp<InstR2b>();
context.Config.GpuAccessor.Log("Shader instruction R2b is not implemented.");
}
public static void Ram(EmitterContext context)
{
InstRam op = context.GetOp<InstRam>();
context.Config.GpuAccessor.Log("Shader instruction Ram is not implemented.");
}
public static void Rtt(EmitterContext context)
{
InstRtt op = context.GetOp<InstRtt>();
context.Config.GpuAccessor.Log("Shader instruction Rtt is not implemented.");
}
public static void Sam(EmitterContext context)
{
InstSam op = context.GetOp<InstSam>();
context.Config.GpuAccessor.Log("Shader instruction Sam is not implemented.");
}
public static void Setcrsptr(EmitterContext context)
{
InstSetcrsptr op = context.GetOp<InstSetcrsptr>();
context.Config.GpuAccessor.Log("Shader instruction Setcrsptr is not implemented.");
}
public static void Setlmembase(EmitterContext context)
{
InstSetlmembase op = context.GetOp<InstSetlmembase>();
context.Config.GpuAccessor.Log("Shader instruction Setlmembase is not implemented.");
}
public static void ShfLR(EmitterContext context)
{
InstShfLR op = context.GetOp<InstShfLR>();
context.Config.GpuAccessor.Log("Shader instruction ShfLR is not implemented.");
}
public static void ShfRR(EmitterContext context)
{
InstShfRR op = context.GetOp<InstShfRR>();
context.Config.GpuAccessor.Log("Shader instruction ShfRR is not implemented.");
}
public static void ShfLI(EmitterContext context)
{
InstShfLI op = context.GetOp<InstShfLI>();
context.Config.GpuAccessor.Log("Shader instruction ShfLI is not implemented.");
}
public static void ShfRI(EmitterContext context)
{
InstShfRI op = context.GetOp<InstShfRI>();
context.Config.GpuAccessor.Log("Shader instruction ShfRI is not implemented.");
}
public static void St(EmitterContext context)
{
InstSt op = context.GetOp<InstSt>();
context.Config.GpuAccessor.Log("Shader instruction St is not implemented.");
}
public static void Stp(EmitterContext context)
{
InstStp op = context.GetOp<InstStp>();
context.Config.GpuAccessor.Log("Shader instruction Stp is not implemented.");
}
public static void Txa(EmitterContext context)
{
InstTxa op = context.GetOp<InstTxa>();
context.Config.GpuAccessor.Log("Shader instruction Txa is not implemented.");
}
public static void Vabsdiff(EmitterContext context)
{
InstVabsdiff op = context.GetOp<InstVabsdiff>();
context.Config.GpuAccessor.Log("Shader instruction Vabsdiff is not implemented.");
}
public static void Vabsdiff4(EmitterContext context)
{
InstVabsdiff4 op = context.GetOp<InstVabsdiff4>();
context.Config.GpuAccessor.Log("Shader instruction Vabsdiff4 is not implemented.");
}
public static void Vadd(EmitterContext context)
{
InstVadd op = context.GetOp<InstVadd>();
context.Config.GpuAccessor.Log("Shader instruction Vadd is not implemented.");
}
public static void Votevtg(EmitterContext context)
{
InstVotevtg op = context.GetOp<InstVotevtg>();
context.Config.GpuAccessor.Log("Shader instruction Votevtg is not implemented.");
}
public static void Vset(EmitterContext context)
{
InstVset op = context.GetOp<InstVset>();
context.Config.GpuAccessor.Log("Shader instruction Vset is not implemented.");
}
public static void Vsetp(EmitterContext context)
{
InstVsetp op = context.GetOp<InstVsetp>();
context.Config.GpuAccessor.Log("Shader instruction Vsetp is not implemented.");
}
public static void Vshl(EmitterContext context)
{
InstVshl op = context.GetOp<InstVshl>();
context.Config.GpuAccessor.Log("Shader instruction Vshl is not implemented.");
}
public static void Vshr(EmitterContext context)
{
InstVshr op = context.GetOp<InstVshr>();
context.Config.GpuAccessor.Log("Shader instruction Vshr is not implemented.");
}
}
}

View file

@ -1,984 +0,0 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using System;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Bfe(EmitterContext context)
{
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
bool isReverse = op.RawOpCode.Extract(40);
bool isSigned = op.RawOpCode.Extract(48);
Operand srcA = GetSrcA(context);
Operand srcB = GetSrcB(context);
if (isReverse)
{
srcA = context.BitfieldReverse(srcA);
}
Operand position = context.BitwiseAnd(srcB, Const(0xff));
Operand size = context.BitfieldExtractU32(srcB, Const(8), Const(8));
Operand res = isSigned
? context.BitfieldExtractS32(srcA, position, size)
: context.BitfieldExtractU32(srcA, position, size);
context.Copy(GetDest(context), res);
// TODO: CC, X, corner cases
}
public static void Bfi(EmitterContext context)
{
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
Operand srcA = GetSrcA(context);
Operand srcB = GetSrcB(context);
Operand srcC = GetSrcC(context);
Operand position = context.BitwiseAnd(srcB, Const(0xff));
Operand size = context.BitfieldExtractU32(srcB, Const(8), Const(8));
Operand res = context.BitfieldInsert(srcC, srcA, position, size);
context.Copy(GetDest(context), res);
}
public static void Csetp(EmitterContext context)
{
OpCodePset op = (OpCodePset)context.CurrOp;
// TODO: Implement that properly
Operand p0Res = Const(IrConsts.True);
Operand p1Res = context.BitwiseNot(p0Res);
Operand pred = GetPredicate39(context);
p0Res = GetPredLogicalOp(context, op.LogicalOp, p0Res, pred);
p1Res = GetPredLogicalOp(context, op.LogicalOp, p1Res, pred);
context.Copy(Register(op.Predicate3), p0Res);
context.Copy(Register(op.Predicate0), p1Res);
}
public static void Flo(EmitterContext context)
{
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
bool invert = op.RawOpCode.Extract(40);
bool countZeros = op.RawOpCode.Extract(41);
bool isSigned = op.RawOpCode.Extract(48);
Operand srcB = context.BitwiseNot(GetSrcB(context), invert);
Operand res = isSigned
? context.FindFirstSetS32(srcB)
: context.FindFirstSetU32(srcB);
if (countZeros)
{
res = context.BitwiseExclusiveOr(res, Const(31));
}
context.Copy(GetDest(context), res);
}
public static void Iadd(EmitterContext context)
{
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
bool negateA = false, negateB = false;
if (!(op is OpCodeAluImm32))
{
negateB = op.RawOpCode.Extract(48);
negateA = op.RawOpCode.Extract(49);
}
else
{
// TODO: Other IADD32I variant without the negate.
negateA = op.RawOpCode.Extract(56);
}
Operand srcA = context.INegate(GetSrcA(context), negateA);
Operand srcB = context.INegate(GetSrcB(context), negateB);
Operand res = context.IAdd(srcA, srcB);
if (op.Extended)
{
res = context.IAdd(res, context.BitwiseAnd(GetCF(), Const(1)));
}
SetIaddFlags(context, res, srcA, srcB, op.SetCondCode, op.Extended);
context.Copy(GetDest(context), res);
}
public static void Iadd3(EmitterContext context)
{
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
IntegerHalfPart partC = (IntegerHalfPart)op.RawOpCode.Extract(31, 2);
IntegerHalfPart partB = (IntegerHalfPart)op.RawOpCode.Extract(33, 2);
IntegerHalfPart partA = (IntegerHalfPart)op.RawOpCode.Extract(35, 2);
IntegerShift mode = (IntegerShift)op.RawOpCode.Extract(37, 2);
bool negateC = op.RawOpCode.Extract(49);
bool negateB = op.RawOpCode.Extract(50);
bool negateA = op.RawOpCode.Extract(51);
Operand Extend(Operand src, IntegerHalfPart part)
{
if (!(op is OpCodeAluReg) || part == IntegerHalfPart.B32)
{
return src;
}
if (part == IntegerHalfPart.H0)
{
return context.BitwiseAnd(src, Const(0xffff));
}
else if (part == IntegerHalfPart.H1)
{
return context.ShiftRightU32(src, Const(16));
}
else
{
// TODO: Warning.
}
return src;
}
Operand srcA = context.INegate(Extend(GetSrcA(context), partA), negateA);
Operand srcB = context.INegate(Extend(GetSrcB(context), partB), negateB);
Operand srcC = context.INegate(Extend(GetSrcC(context), partC), negateC);
Operand res = context.IAdd(srcA, srcB);
if (op is OpCodeAluReg && mode != IntegerShift.NoShift)
{
if (mode == IntegerShift.ShiftLeft)
{
res = context.ShiftLeft(res, Const(16));
}
else if (mode == IntegerShift.ShiftRight)
{
res = context.ShiftRightU32(res, Const(16));
}
else
{
// TODO: Warning.
}
}
res = context.IAdd(res, srcC);
context.Copy(GetDest(context), res);
// TODO: CC, X, corner cases
}
public static void Icmp(EmitterContext context)
{
OpCode op = context.CurrOp;
bool isSigned = op.RawOpCode.Extract(48);
IntegerCondition cmpOp = (IntegerCondition)op.RawOpCode.Extract(49, 3);
Operand srcA = GetSrcA(context);
Operand srcB = GetSrcB(context);
Operand srcC = GetSrcC(context);
Operand cmpRes = GetIntComparison(context, cmpOp, srcC, Const(0), isSigned);
Operand res = context.ConditionalSelect(cmpRes, srcA, srcB);
context.Copy(GetDest(context), res);
}
public static void Imad(EmitterContext context)
{
bool signedA = context.CurrOp.RawOpCode.Extract(48);
bool signedB = context.CurrOp.RawOpCode.Extract(53);
bool high = context.CurrOp.RawOpCode.Extract(54);
Operand srcA = GetSrcA(context);
Operand srcB = GetSrcB(context);
Operand srcC = GetSrcC(context);
Operand res;
if (high)
{
if (signedA && signedB)
{
res = context.MultiplyHighS32(srcA, srcB);
}
else
{
res = context.MultiplyHighU32(srcA, srcB);
if (signedA)
{
res = context.IAdd(res, context.IMultiply(srcB, context.ShiftRightS32(srcA, Const(31))));
}
else if (signedB)
{
res = context.IAdd(res, context.IMultiply(srcA, context.ShiftRightS32(srcB, Const(31))));
}
}
}
else
{
res = context.IMultiply(srcA, srcB);
}
res = context.IAdd(res, srcC);
// TODO: CC, X, SAT, and more?
context.Copy(GetDest(context), res);
}
public static void Imnmx(EmitterContext context)
{
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
bool isSignedInt = op.RawOpCode.Extract(48);
Operand srcA = GetSrcA(context);
Operand srcB = GetSrcB(context);
Operand resMin = isSignedInt
? context.IMinimumS32(srcA, srcB)
: context.IMinimumU32(srcA, srcB);
Operand resMax = isSignedInt
? context.IMaximumS32(srcA, srcB)
: context.IMaximumU32(srcA, srcB);
Operand pred = GetPredicate39(context);
Operand res = context.ConditionalSelect(pred, resMin, resMax);
context.Copy(GetDest(context), res);
SetZnFlags(context, res, op.SetCondCode);
// TODO: X flags.
}
public static void Iscadd(EmitterContext context)
{
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
bool negateA = false, negateB = false;
if (!(op is OpCodeAluImm32))
{
negateB = op.RawOpCode.Extract(48);
negateA = op.RawOpCode.Extract(49);
}
int shift = op is OpCodeAluImm32
? op.RawOpCode.Extract(53, 5)
: op.RawOpCode.Extract(39, 5);
Operand srcA = GetSrcA(context);
Operand srcB = GetSrcB(context);
srcA = context.ShiftLeft(srcA, Const(shift));
srcA = context.INegate(srcA, negateA);
srcB = context.INegate(srcB, negateB);
Operand res = context.IAdd(srcA, srcB);
SetIaddFlags(context, res, srcA, srcB, op.SetCondCode, false);
context.Copy(GetDest(context), res);
}
public static void Iset(EmitterContext context)
{
OpCodeSet op = (OpCodeSet)context.CurrOp;
bool boolFloat = op.RawOpCode.Extract(44);
bool isSigned = op.RawOpCode.Extract(48);
IntegerCondition cmpOp = (IntegerCondition)op.RawOpCode.Extract(49, 3);
Operand srcA = GetSrcA(context);
Operand srcB = GetSrcB(context);
Operand res = GetIntComparison(context, cmpOp, srcA, srcB, isSigned, op.Extended);
Operand pred = GetPredicate39(context);
res = GetPredLogicalOp(context, op.LogicalOp, res, pred);
Operand dest = GetDest(context);
if (boolFloat)
{
res = context.ConditionalSelect(res, ConstF(1), Const(0));
context.Copy(dest, res);
SetFPZnFlags(context, res, op.SetCondCode);
}
else
{
context.Copy(dest, res);
SetZnFlags(context, res, op.SetCondCode, op.Extended);
}
}
public static void Isetp(EmitterContext context)
{
OpCodeSet op = (OpCodeSet)context.CurrOp;
bool isSigned = op.RawOpCode.Extract(48);
IntegerCondition cmpOp = (IntegerCondition)op.RawOpCode.Extract(49, 3);
Operand srcA = GetSrcA(context);
Operand srcB = GetSrcB(context);
Operand p0Res = GetIntComparison(context, cmpOp, srcA, srcB, isSigned, op.Extended);
Operand p1Res = context.BitwiseNot(p0Res);
Operand pred = GetPredicate39(context);
p0Res = GetPredLogicalOp(context, op.LogicalOp, p0Res, pred);
p1Res = GetPredLogicalOp(context, op.LogicalOp, p1Res, pred);
context.Copy(Register(op.Predicate3), p0Res);
context.Copy(Register(op.Predicate0), p1Res);
}
public static void Lea(EmitterContext context)
{
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
bool negateA = op.RawOpCode.Extract(45);
int shift = op.RawOpCode.Extract(39, 5);
Operand srcA = GetSrcA(context);
Operand srcB = GetSrcB(context);
srcA = context.ShiftLeft(srcA, Const(shift));
srcA = context.INegate(srcA, negateA);
Operand res = context.IAdd(srcA, srcB);
context.Copy(GetDest(context), res);
// TODO: CC, X
}
public static void Lea_Hi(EmitterContext context)
{
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
bool isReg = op is OpCodeAluReg;
bool negateA;
int shift;
if (isReg)
{
negateA = op.RawOpCode.Extract(37);
shift = op.RawOpCode.Extract(28, 5);
}
else
{
negateA = op.RawOpCode.Extract(56);
shift = op.RawOpCode.Extract(51, 5);
}
Operand srcA = GetSrcA(context);
Operand srcB = GetSrcB(context);
Operand srcC = GetSrcC(context);
Operand aLow = context.ShiftLeft(srcA, Const(shift));
Operand aHigh = shift == 0 ? Const(0) : context.ShiftRightU32(srcA, Const(32 - shift));
aHigh = context.BitwiseOr(aHigh, context.ShiftLeft(srcC, Const(shift)));
if (negateA)
{
// Perform 64-bit negation by doing bitwise not of the value,
// then adding 1 and carrying over from low to high.
aLow = context.BitwiseNot(aLow);
aHigh = context.BitwiseNot(aHigh);
aLow = AddWithCarry(context, aLow, Const(1), out Operand aLowCOut);
aHigh = context.IAdd(aHigh, aLowCOut);
}
Operand res = context.IAdd(aHigh, srcB);
context.Copy(GetDest(context), res);
// TODO: CC, X
}
public static void Lop(EmitterContext context)
{
IOpCodeLop op = (IOpCodeLop)context.CurrOp;
Operand srcA = context.BitwiseNot(GetSrcA(context), op.InvertA);
Operand srcB = context.BitwiseNot(GetSrcB(context), op.InvertB);
Operand res = srcB;
switch (op.LogicalOp)
{
case LogicalOperation.And: res = context.BitwiseAnd (srcA, srcB); break;
case LogicalOperation.Or: res = context.BitwiseOr (srcA, srcB); break;
case LogicalOperation.ExclusiveOr: res = context.BitwiseExclusiveOr(srcA, srcB); break;
}
EmitLopPredWrite(context, op, res, (ConditionalOperation)context.CurrOp.RawOpCode.Extract(44, 2));
context.Copy(GetDest(context), res);
SetZnFlags(context, res, op.SetCondCode, op.Extended);
}
public static void Lop3(EmitterContext context)
{
IOpCodeLop op = (IOpCodeLop)context.CurrOp;
Operand srcA = GetSrcA(context);
Operand srcB = GetSrcB(context);
Operand srcC = GetSrcC(context);
bool regVariant = op is OpCodeLopReg;
int truthTable = regVariant
? op.RawOpCode.Extract(28, 8)
: op.RawOpCode.Extract(48, 8);
Operand res = Lop3Expression.GetFromTruthTable(context, srcA, srcB, srcC, truthTable);
if (regVariant)
{
EmitLopPredWrite(context, op, res, (ConditionalOperation)context.CurrOp.RawOpCode.Extract(36, 2));
}
context.Copy(GetDest(context), res);
SetZnFlags(context, res, op.SetCondCode, op.Extended);
}
public static void Popc(EmitterContext context)
{
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
bool invert = op.RawOpCode.Extract(40);
Operand srcB = context.BitwiseNot(GetSrcB(context), invert);
Operand res = context.BitCount(srcB);
context.Copy(GetDest(context), res);
}
public static void Pset(EmitterContext context)
{
OpCodePset op = (OpCodePset)context.CurrOp;
bool boolFloat = op.RawOpCode.Extract(44);
Operand srcA = context.BitwiseNot(Register(op.Predicate12), op.InvertA);
Operand srcB = context.BitwiseNot(Register(op.Predicate29), op.InvertB);
Operand srcC = context.BitwiseNot(Register(op.Predicate39), op.InvertP);
Operand res = GetPredLogicalOp(context, op.LogicalOpAB, srcA, srcB);
res = GetPredLogicalOp(context, op.LogicalOp, res, srcC);
Operand dest = GetDest(context);
if (boolFloat)
{
context.Copy(dest, context.ConditionalSelect(res, ConstF(1), Const(0)));
}
else
{
context.Copy(dest, res);
}
}
public static void Psetp(EmitterContext context)
{
OpCodePset op = (OpCodePset)context.CurrOp;
Operand srcA = context.BitwiseNot(Register(op.Predicate12), op.InvertA);
Operand srcB = context.BitwiseNot(Register(op.Predicate29), op.InvertB);
Operand p0Res = GetPredLogicalOp(context, op.LogicalOpAB, srcA, srcB);
Operand p1Res = context.BitwiseNot(p0Res);
Operand pred = GetPredicate39(context);
p0Res = GetPredLogicalOp(context, op.LogicalOp, p0Res, pred);
p1Res = GetPredLogicalOp(context, op.LogicalOp, p1Res, pred);
context.Copy(Register(op.Predicate3), p0Res);
context.Copy(Register(op.Predicate0), p1Res);
}
public static void Rro(EmitterContext context)
{
// This is the range reduction operator,
// we translate it as a simple move, as it
// should be always followed by a matching
// MUFU instruction.
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
bool negateB = op.RawOpCode.Extract(45);
bool absoluteB = op.RawOpCode.Extract(49);
Operand srcB = GetSrcB(context);
srcB = context.FPAbsNeg(srcB, absoluteB, negateB);
context.Copy(GetDest(context), srcB);
}
public static void Shl(EmitterContext context)
{
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
bool isMasked = op.RawOpCode.Extract(39);
Operand srcB = GetSrcB(context);
if (isMasked)
{
srcB = context.BitwiseAnd(srcB, Const(0x1f));
}
Operand res = context.ShiftLeft(GetSrcA(context), srcB);
if (!isMasked)
{
// Clamped shift value.
Operand isLessThan32 = context.ICompareLessUnsigned(srcB, Const(32));
res = context.ConditionalSelect(isLessThan32, res, Const(0));
}
// TODO: X, CC
context.Copy(GetDest(context), res);
}
public static void Shr(EmitterContext context)
{
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
bool isMasked = op.RawOpCode.Extract(39);
bool isReverse = op.RawOpCode.Extract(40);
bool isSigned = op.RawOpCode.Extract(48);
Operand srcA = GetSrcA(context);
Operand srcB = GetSrcB(context);
if (isReverse)
{
srcA = context.BitfieldReverse(srcA);
}
if (isMasked)
{
srcB = context.BitwiseAnd(srcB, Const(0x1f));
}
Operand res = isSigned
? context.ShiftRightS32(srcA, srcB)
: context.ShiftRightU32(srcA, srcB);
if (!isMasked)
{
// Clamped shift value.
Operand resShiftBy32;
if (isSigned)
{
resShiftBy32 = context.ShiftRightS32(srcA, Const(31));
}
else
{
resShiftBy32 = Const(0);
}
Operand isLessThan32 = context.ICompareLessUnsigned(srcB, Const(32));
res = context.ConditionalSelect(isLessThan32, res, resShiftBy32);
}
// TODO: X, CC
context.Copy(GetDest(context), res);
}
public static void Xmad(EmitterContext context)
{
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
bool signedA = context.CurrOp.RawOpCode.Extract(48);
bool signedB = context.CurrOp.RawOpCode.Extract(49);
bool highA = context.CurrOp.RawOpCode.Extract(53);
bool isReg = (op is OpCodeAluReg) && !(op is OpCodeAluRegCbuf);
bool isImm = (op is OpCodeAluImm);
XmadCMode mode = isReg || isImm
? (XmadCMode)context.CurrOp.RawOpCode.Extract(50, 3)
: (XmadCMode)context.CurrOp.RawOpCode.Extract(50, 2);
bool highB = false;
if (isReg)
{
highB = context.CurrOp.RawOpCode.Extract(35);
}
else if (!isImm)
{
highB = context.CurrOp.RawOpCode.Extract(52);
}
Operand srcA = GetSrcA(context);
Operand srcB = GetSrcB(context);
Operand srcC = GetSrcC(context);
// XMAD immediates are 16-bits unsigned integers.
if (srcB.Type == OperandType.Constant)
{
srcB = Const(srcB.Value & 0xffff);
}
Operand Extend16To32(Operand src, bool high, bool signed)
{
if (signed && high)
{
return context.ShiftRightS32(src, Const(16));
}
else if (signed)
{
return context.BitfieldExtractS32(src, Const(0), Const(16));
}
else if (high)
{
return context.ShiftRightU32(src, Const(16));
}
else
{
return context.BitwiseAnd(src, Const(0xffff));
}
}
srcA = Extend16To32(srcA, highA, signedA);
srcB = Extend16To32(srcB, highB, signedB);
bool productShiftLeft = false;
bool merge = false;
if (op is OpCodeAluCbuf)
{
productShiftLeft = context.CurrOp.RawOpCode.Extract(55);
merge = context.CurrOp.RawOpCode.Extract(56);
}
else if (!(op is OpCodeAluRegCbuf))
{
productShiftLeft = context.CurrOp.RawOpCode.Extract(36);
merge = context.CurrOp.RawOpCode.Extract(37);
}
bool extended;
if ((op is OpCodeAluReg) || (op is OpCodeAluImm))
{
extended = context.CurrOp.RawOpCode.Extract(38);
}
else
{
extended = context.CurrOp.RawOpCode.Extract(54);
}
Operand res = context.IMultiply(srcA, srcB);
if (productShiftLeft)
{
res = context.ShiftLeft(res, Const(16));
}
switch (mode)
{
case XmadCMode.Cfull: break;
case XmadCMode.Clo: srcC = Extend16To32(srcC, high: false, signed: false); break;
case XmadCMode.Chi: srcC = Extend16To32(srcC, high: true, signed: false); break;
case XmadCMode.Cbcc:
{
srcC = context.IAdd(srcC, context.ShiftLeft(GetSrcB(context), Const(16)));
break;
}
case XmadCMode.Csfu:
{
Operand signAdjustA = context.ShiftLeft(context.ShiftRightU32(srcA, Const(31)), Const(16));
Operand signAdjustB = context.ShiftLeft(context.ShiftRightU32(srcB, Const(31)), Const(16));
srcC = context.ISubtract(srcC, context.IAdd(signAdjustA, signAdjustB));
break;
}
default: /* TODO: Warning */ break;
}
Operand product = res;
if (extended)
{
// Add with carry.
res = context.IAdd(res, context.BitwiseAnd(GetCF(), Const(1)));
}
else
{
// Add (no carry in).
res = context.IAdd(res, srcC);
}
SetIaddFlags(context, res, product, srcC, op.SetCondCode, extended);
if (merge)
{
res = context.BitwiseAnd(res, Const(0xffff));
res = context.BitwiseOr(res, context.ShiftLeft(GetSrcB(context), Const(16)));
}
context.Copy(GetDest(context), res);
}
private static Operand GetIntComparison(
EmitterContext context,
IntegerCondition cond,
Operand srcA,
Operand srcB,
bool isSigned,
bool extended)
{
return extended
? GetIntComparisonExtended(context, cond, srcA, srcB, isSigned)
: GetIntComparison (context, cond, srcA, srcB, isSigned);
}
private static Operand GetIntComparisonExtended(
EmitterContext context,
IntegerCondition cond,
Operand srcA,
Operand srcB,
bool isSigned)
{
Operand res;
if (cond == IntegerCondition.Always)
{
res = Const(IrConsts.True);
}
else if (cond == IntegerCondition.Never)
{
res = Const(IrConsts.False);
}
else
{
res = context.ISubtract(srcA, srcB);
res = context.IAdd(res, context.BitwiseNot(GetCF()));
switch (cond)
{
case Decoders.IntegerCondition.Equal: // r = xh == yh && xl == yl
res = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), GetZF());
break;
case Decoders.IntegerCondition.Less: // r = xh < yh || (xh == yh && xl < yl)
Operand notC = context.BitwiseNot(GetCF());
Operand prevLt = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), notC);
res = isSigned
? context.BitwiseOr(context.ICompareLess(srcA, srcB), prevLt)
: context.BitwiseOr(context.ICompareLessUnsigned(srcA, srcB), prevLt);
break;
case Decoders.IntegerCondition.LessOrEqual: // r = xh < yh || (xh == yh && xl <= yl)
Operand zOrNotC = context.BitwiseOr(GetZF(), context.BitwiseNot(GetCF()));
Operand prevLe = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), zOrNotC);
res = isSigned
? context.BitwiseOr(context.ICompareLess(srcA, srcB), prevLe)
: context.BitwiseOr(context.ICompareLessUnsigned(srcA, srcB), prevLe);
break;
case Decoders.IntegerCondition.Greater: // r = xh > yh || (xh == yh && xl > yl)
Operand notZAndC = context.BitwiseAnd(context.BitwiseNot(GetZF()), GetCF());
Operand prevGt = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), notZAndC);
res = isSigned
? context.BitwiseOr(context.ICompareGreater(srcA, srcB), prevGt)
: context.BitwiseOr(context.ICompareGreaterUnsigned(srcA, srcB), prevGt);
break;
case Decoders.IntegerCondition.GreaterOrEqual: // r = xh > yh || (xh == yh && xl >= yl)
Operand prevGe = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), GetCF());
res = isSigned
? context.BitwiseOr(context.ICompareGreater(srcA, srcB), prevGe)
: context.BitwiseOr(context.ICompareGreaterUnsigned(srcA, srcB), prevGe);
break;
case Decoders.IntegerCondition.NotEqual: // r = xh != yh || xl != yl
context.BitwiseOr(context.ICompareNotEqual(srcA, srcB), context.BitwiseNot(GetZF()));
break;
default:
throw new InvalidOperationException($"Unexpected condition \"{cond}\".");
}
}
return res;
}
private static Operand GetIntComparison(
EmitterContext context,
IntegerCondition cond,
Operand srcA,
Operand srcB,
bool isSigned)
{
Operand res;
if (cond == IntegerCondition.Always)
{
res = Const(IrConsts.True);
}
else if (cond == IntegerCondition.Never)
{
res = Const(IrConsts.False);
}
else
{
var inst = cond switch
{
IntegerCondition.Less => Instruction.CompareLessU32,
IntegerCondition.Equal => Instruction.CompareEqual,
IntegerCondition.LessOrEqual => Instruction.CompareLessOrEqualU32,
IntegerCondition.Greater => Instruction.CompareGreaterU32,
IntegerCondition.NotEqual => Instruction.CompareNotEqual,
IntegerCondition.GreaterOrEqual => Instruction.CompareGreaterOrEqualU32,
_ => throw new InvalidOperationException($"Unexpected condition \"{cond}\".")
};
if (isSigned)
{
switch (cond)
{
case IntegerCondition.Less: inst = Instruction.CompareLess; break;
case IntegerCondition.LessOrEqual: inst = Instruction.CompareLessOrEqual; break;
case IntegerCondition.Greater: inst = Instruction.CompareGreater; break;
case IntegerCondition.GreaterOrEqual: inst = Instruction.CompareGreaterOrEqual; break;
}
}
res = context.Add(inst, Local(), srcA, srcB);
}
return res;
}
private static void EmitLopPredWrite(EmitterContext context, IOpCodeLop op, Operand result, ConditionalOperation condOp)
{
if (op is OpCodeLop opLop && !opLop.Predicate48.IsPT)
{
Operand pRes;
if (condOp == ConditionalOperation.False)
{
pRes = Const(IrConsts.False);
}
else if (condOp == ConditionalOperation.True)
{
pRes = Const(IrConsts.True);
}
else if (condOp == ConditionalOperation.Zero)
{
pRes = context.ICompareEqual(result, Const(0));
}
else /* if (opLop.CondOp == ConditionalOperation.NotZero) */
{
pRes = context.ICompareNotEqual(result, Const(0));
}
context.Copy(Register(opLop.Predicate48), pRes);
}
}
private static void SetIaddFlags(
EmitterContext context,
Operand res,
Operand srcA,
Operand srcB,
bool setCC,
bool extended)
{
if (!setCC)
{
return;
}
if (extended)
{
// C = (d == a && CIn) || d < a
Operand tempC0 = context.ICompareEqual (res, srcA);
Operand tempC1 = context.ICompareLessUnsigned(res, srcA);
tempC0 = context.BitwiseAnd(tempC0, GetCF());
context.Copy(GetCF(), context.BitwiseOr(tempC0, tempC1));
}
else
{
// C = d < a
context.Copy(GetCF(), context.ICompareLessUnsigned(res, srcA));
}
// V = (d ^ a) & ~(a ^ b) < 0
Operand tempV0 = context.BitwiseExclusiveOr(res, srcA);
Operand tempV1 = context.BitwiseExclusiveOr(srcA, srcB);
tempV1 = context.BitwiseNot(tempV1);
Operand tempV = context.BitwiseAnd(tempV0, tempV1);
context.Copy(GetVF(), context.ICompareLess(tempV, Const(0)));
SetZnFlags(context, res, setCC: true, extended: extended);
}
}
}

View file

@ -10,50 +10,39 @@ namespace Ryujinx.Graphics.Shader.Instructions
{
static class InstEmitAluHelper
{
public static long GetIntMin(IntegerType type)
public static long GetIntMin(IDstFmt type)
{
switch (type)
return type switch
{
case IntegerType.U8: return byte.MinValue;
case IntegerType.S8: return sbyte.MinValue;
case IntegerType.U16: return ushort.MinValue;
case IntegerType.S16: return short.MinValue;
case IntegerType.U32: return uint.MinValue;
case IntegerType.S32: return int.MinValue;
}
throw new ArgumentException($"The type \"{type}\" is not a supported int type.");
IDstFmt.U16 => ushort.MinValue,
IDstFmt.S16 => short.MinValue,
IDstFmt.U32 => uint.MinValue,
IDstFmt.S32 => int.MinValue,
_ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.")
};
}
public static long GetIntMax(IntegerType type)
public static long GetIntMax(IDstFmt type)
{
switch (type)
return type switch
{
case IntegerType.U8: return byte.MaxValue;
case IntegerType.S8: return sbyte.MaxValue;
case IntegerType.U16: return ushort.MaxValue;
case IntegerType.S16: return short.MaxValue;
case IntegerType.U32: return uint.MaxValue;
case IntegerType.S32: return int.MaxValue;
}
throw new ArgumentException($"The type \"{type}\" is not a supported int type.");
IDstFmt.U16 => ushort.MaxValue,
IDstFmt.S16 => short.MaxValue,
IDstFmt.U32 => uint.MaxValue,
IDstFmt.S32 => int.MaxValue,
_ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.")
};
}
public static Operand GetPredLogicalOp(
EmitterContext context,
LogicalOperation logicalOp,
Operand input,
Operand pred)
public static Operand GetPredLogicalOp(EmitterContext context, BoolOp logicOp, Operand input, Operand pred)
{
switch (logicalOp)
return logicOp switch
{
case LogicalOperation.And: return context.BitwiseAnd (input, pred);
case LogicalOperation.Or: return context.BitwiseOr (input, pred);
case LogicalOperation.ExclusiveOr: return context.BitwiseExclusiveOr(input, pred);
}
return input;
BoolOp.And => context.BitwiseAnd(input, pred),
BoolOp.Or => context.BitwiseOr(input, pred),
BoolOp.Xor => context.BitwiseExclusiveOr(input, pred),
_ => input
};
}
public static void SetZnFlags(EmitterContext context, Operand dest, bool setCC, bool extended = false)

View file

@ -0,0 +1,190 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Al2p(EmitterContext context)
{
InstAl2p op = context.GetOp<InstAl2p>();
context.Copy(GetDest(op.Dest), context.IAdd(GetSrcReg(context, op.SrcA), Const(op.Imm11)));
}
public static void Ald(EmitterContext context)
{
InstAld op = context.GetOp<InstAld>();
Operand primVertex = context.Copy(GetSrcReg(context, op.SrcB));
for (int index = 0; index < (int)op.AlSize + 1; index++)
{
Register rd = new Register(op.Dest + index, RegisterType.Gpr);
if (rd.IsRZ)
{
break;
}
if (op.Phys)
{
Operand userAttrOffset = context.ISubtract(GetSrcReg(context, op.SrcA), Const(AttributeConsts.UserAttributeBase));
Operand userAttrIndex = context.ShiftRightU32(userAttrOffset, Const(2));
context.Copy(Register(rd), context.LoadAttribute(Const(AttributeConsts.UserAttributeBase), userAttrIndex, primVertex));
context.Config.SetUsedFeature(FeatureFlags.IaIndexing);
}
else if (op.SrcB == RegisterConsts.RegisterZeroIndex)
{
Operand src = Attribute(op.Imm11 + index * 4);
context.FlagAttributeRead(src.Value);
context.Copy(Register(rd), src);
}
else
{
Operand src = Const(op.Imm11 + index * 4);
context.FlagAttributeRead(src.Value);
context.Copy(Register(rd), context.LoadAttribute(src, Const(0), primVertex));
}
}
}
public static void Ast(EmitterContext context)
{
InstAst op = context.GetOp<InstAst>();
for (int index = 0; index < (int)op.AlSize + 1; index++)
{
if (op.SrcB + index > RegisterConsts.RegisterZeroIndex)
{
break;
}
Register rd = new Register(op.SrcB + index, RegisterType.Gpr);
if (op.Phys)
{
Operand userAttrOffset = context.ISubtract(GetSrcReg(context, op.SrcA), Const(AttributeConsts.UserAttributeBase));
Operand userAttrIndex = context.ShiftRightU32(userAttrOffset, Const(2));
context.StoreAttribute(Const(AttributeConsts.UserAttributeBase), userAttrIndex, Register(rd));
context.Config.SetUsedFeature(FeatureFlags.OaIndexing);
}
else
{
Operand dest = Attribute(op.Imm11 + index * 4);
context.FlagAttributeWritten(dest.Value);
context.Copy(dest, Register(rd));
}
}
}
public static void Ipa(EmitterContext context)
{
InstIpa op = context.GetOp<InstIpa>();
context.FlagAttributeRead(op.Imm10);
Operand res;
if (op.Idx)
{
Operand userAttrOffset = context.ISubtract(GetSrcReg(context, op.SrcA), Const(AttributeConsts.UserAttributeBase));
Operand userAttrIndex = context.ShiftRightU32(userAttrOffset, Const(2));
res = context.LoadAttribute(Const(AttributeConsts.UserAttributeBase), userAttrIndex, Const(0));
res = context.FPMultiply(res, Attribute(AttributeConsts.PositionW));
context.Config.SetUsedFeature(FeatureFlags.IaIndexing);
}
else
{
res = Attribute(op.Imm10);
if (op.Imm10 >= AttributeConsts.UserAttributeBase && op.Imm10 < AttributeConsts.UserAttributeEnd)
{
int index = (op.Imm10 - AttributeConsts.UserAttributeBase) >> 4;
if (context.Config.ImapTypes[index].GetFirstUsedType() == PixelImap.Perspective)
{
res = context.FPMultiply(res, Attribute(AttributeConsts.PositionW));
}
}
}
if (op.IpaOp == IpaOp.Multiply)
{
Operand srcB = GetSrcReg(context, op.SrcB);
res = context.FPMultiply(res, srcB);
}
res = context.FPSaturate(res, op.Sat);
context.Copy(GetDest(op.Dest), res);
}
public static void Isberd(EmitterContext context)
{
InstIsberd op = context.GetOp<InstIsberd>();
// This instruction performs a load from ISBE memory,
// however it seems to be only used to get some vertex
// input data, so we instead propagate the offset so that
// it can be used on the attribute load.
context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcA));
}
public static void OutR(EmitterContext context)
{
InstOutR op = context.GetOp<InstOutR>();
EmitOut(context, op.OutType.HasFlag(OutType.Emit), op.OutType.HasFlag(OutType.Cut));
}
public static void OutI(EmitterContext context)
{
InstOutI op = context.GetOp<InstOutI>();
EmitOut(context, op.OutType.HasFlag(OutType.Emit), op.OutType.HasFlag(OutType.Cut));
}
public static void OutC(EmitterContext context)
{
InstOutC op = context.GetOp<InstOutC>();
EmitOut(context, op.OutType.HasFlag(OutType.Emit), op.OutType.HasFlag(OutType.Cut));
}
private static void EmitOut(EmitterContext context, bool emit, bool cut)
{
if (!(emit || cut))
{
context.Config.GpuAccessor.Log("Invalid OUT encoding.");
}
if (emit)
{
context.EmitVertex();
}
if (cut)
{
context.EndPrimitive();
}
}
}
}

View file

@ -0,0 +1,44 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.Translation;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Bar(EmitterContext context)
{
InstBar op = context.GetOp<InstBar>();
// TODO: Support other modes.
if (op.BarOp == BarOp.Sync)
{
context.Barrier();
}
else
{
context.Config.GpuAccessor.Log($"Invalid barrier mode: {op.BarOp}.");
}
}
public static void Depbar(EmitterContext context)
{
InstDepbar op = context.GetOp<InstDepbar>();
// No operation.
}
public static void Membar(EmitterContext context)
{
InstMembar op = context.GetOp<InstMembar>();
if (op.Membar == Decoders.Membar.Cta)
{
context.GroupMemoryBarrier();
}
else
{
context.MemoryBarrier();
}
}
}
}

View file

@ -0,0 +1,190 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void BfeR(EmitterContext context)
{
InstBfeR op = context.GetOp<InstBfeR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitBfe(context, srcA, srcB, op.Dest, op.Brev, op.Signed);
}
public static void BfeI(EmitterContext context)
{
InstBfeI op = context.GetOp<InstBfeI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitBfe(context, srcA, srcB, op.Dest, op.Brev, op.Signed);
}
public static void BfeC(EmitterContext context)
{
InstBfeC op = context.GetOp<InstBfeC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitBfe(context, srcA, srcB, op.Dest, op.Brev, op.Signed);
}
public static void BfiR(EmitterContext context)
{
InstBfiR op = context.GetOp<InstBfiR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitBfi(context, srcA, srcB, srcC, op.Dest);
}
public static void BfiI(EmitterContext context)
{
InstBfiI op = context.GetOp<InstBfiI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
var srcC = GetSrcReg(context, op.SrcC);
EmitBfi(context, srcA, srcB, srcC, op.Dest);
}
public static void BfiC(EmitterContext context)
{
InstBfiC op = context.GetOp<InstBfiC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitBfi(context, srcA, srcB, srcC, op.Dest);
}
public static void BfiRc(EmitterContext context)
{
InstBfiRc op = context.GetOp<InstBfiRc>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcC);
var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitBfi(context, srcA, srcB, srcC, op.Dest);
}
public static void FloR(EmitterContext context)
{
InstFloR op = context.GetOp<InstFloR>();
EmitFlo(context, GetSrcReg(context, op.SrcB), op.Dest, op.NegB, op.Sh, op.Signed);
}
public static void FloI(EmitterContext context)
{
InstFloI op = context.GetOp<InstFloI>();
EmitFlo(context, GetSrcImm(context, Imm20ToSInt(op.Imm20)), op.Dest, op.NegB, op.Sh, op.Signed);
}
public static void FloC(EmitterContext context)
{
InstFloC op = context.GetOp<InstFloC>();
EmitFlo(context, GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.NegB, op.Sh, op.Signed);
}
public static void PopcR(EmitterContext context)
{
InstPopcR op = context.GetOp<InstPopcR>();
EmitPopc(context, GetSrcReg(context, op.SrcB), op.Dest, op.NegB);
}
public static void PopcI(EmitterContext context)
{
InstPopcI op = context.GetOp<InstPopcI>();
EmitPopc(context, GetSrcImm(context, Imm20ToSInt(op.Imm20)), op.Dest, op.NegB);
}
public static void PopcC(EmitterContext context)
{
InstPopcC op = context.GetOp<InstPopcC>();
EmitPopc(context, GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.NegB);
}
private static void EmitBfe(
EmitterContext context,
Operand srcA,
Operand srcB,
int rd,
bool bitReverse,
bool isSigned)
{
if (bitReverse)
{
srcA = context.BitfieldReverse(srcA);
}
Operand position = context.BitwiseAnd(srcB, Const(0xff));
Operand size = context.BitfieldExtractU32(srcB, Const(8), Const(8));
Operand res = isSigned
? context.BitfieldExtractS32(srcA, position, size)
: context.BitfieldExtractU32(srcA, position, size);
context.Copy(GetDest(rd), res);
// TODO: CC, X, corner cases.
}
private static void EmitBfi(EmitterContext context, Operand srcA, Operand srcB, Operand srcC, int rd)
{
Operand position = context.BitwiseAnd(srcB, Const(0xff));
Operand size = context.BitfieldExtractU32(srcB, Const(8), Const(8));
Operand res = context.BitfieldInsert(srcC, srcA, position, size);
context.Copy(GetDest(rd), res);
}
private static void EmitFlo(EmitterContext context, Operand src, int rd, bool invert, bool sh, bool isSigned)
{
Operand srcB = context.BitwiseNot(src, invert);
Operand res = isSigned
? context.FindFirstSetS32(srcB)
: context.FindFirstSetU32(srcB);
if (sh)
{
res = context.BitwiseExclusiveOr(res, Const(31));
}
context.Copy(GetDest(rd), res);
}
private static void EmitPopc(EmitterContext context, Operand src, int rd, bool invert)
{
Operand srcB = context.BitwiseNot(src, invert);
Operand res = context.BitCount(srcB);
context.Copy(GetDest(rd), res);
}
}
}

View file

@ -1,253 +1,400 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using System;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void F2F(EmitterContext context)
public static void F2fR(EmitterContext context)
{
OpCodeFArith op = (OpCodeFArith)context.CurrOp;
InstF2fR op = context.GetOp<InstF2fR>();
FPType dstType = (FPType)op.RawOpCode.Extract(8, 2);
FPType srcType = (FPType)op.RawOpCode.Extract(10, 2);
var src = UnpackReg(context, op.SrcFmt, op.Sh, op.SrcB);
bool round = op.RawOpCode.Extract(42);
bool negateB = op.RawOpCode.Extract(45);
bool absoluteB = op.RawOpCode.Extract(49);
EmitF2F(context, op.SrcFmt, op.DstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB, op.Sat);
}
Operand srcB = context.FPAbsNeg(GetSrcB(context, srcType), absoluteB, negateB, srcType.ToInstFPType());
public static void F2fI(EmitterContext context)
{
InstF2fI op = context.GetOp<InstF2fI>();
if (round && srcType == dstType)
var src = UnpackImm(context, op.SrcFmt, op.Sh, Imm20ToFloat(op.Imm20));
EmitF2F(context, op.SrcFmt, op.DstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB, op.Sat);
}
public static void F2fC(EmitterContext context)
{
InstF2fC op = context.GetOp<InstF2fC>();
var src = UnpackCbuf(context, op.SrcFmt, op.Sh, op.CbufSlot, op.CbufOffset);
EmitF2F(context, op.SrcFmt, op.DstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB, op.Sat);
}
public static void F2iR(EmitterContext context)
{
InstF2iR op = context.GetOp<InstF2iR>();
var src = UnpackReg(context, op.SrcFmt, op.Sh, op.SrcB);
EmitF2I(context, op.SrcFmt, op.IDstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB);
}
public static void F2iI(EmitterContext context)
{
InstF2iI op = context.GetOp<InstF2iI>();
var src = UnpackImm(context, op.SrcFmt, op.Sh, Imm20ToFloat(op.Imm20));
EmitF2I(context, op.SrcFmt, op.IDstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB);
}
public static void F2iC(EmitterContext context)
{
InstF2iC op = context.GetOp<InstF2iC>();
var src = UnpackCbuf(context, op.SrcFmt, op.Sh, op.CbufSlot, op.CbufOffset);
EmitF2I(context, op.SrcFmt, op.IDstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB);
}
public static void I2fR(EmitterContext context)
{
InstI2fR op = context.GetOp<InstI2fR>();
var src = GetSrcReg(context, op.SrcB);
EmitI2F(context, op.ISrcFmt, op.DstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB);
}
public static void I2fI(EmitterContext context)
{
InstI2fI op = context.GetOp<InstI2fI>();
var src = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitI2F(context, op.ISrcFmt, op.DstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB);
}
public static void I2fC(EmitterContext context)
{
InstI2fC op = context.GetOp<InstI2fC>();
var src = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitI2F(context, op.ISrcFmt, op.DstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB);
}
public static void I2iR(EmitterContext context)
{
InstI2iR op = context.GetOp<InstI2iR>();
var src = GetSrcReg(context, op.SrcB);
EmitI2I(context, op.SrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat);
}
public static void I2iI(EmitterContext context)
{
InstI2iI op = context.GetOp<InstI2iI>();
var src = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitI2I(context, op.SrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat);
}
public static void I2iC(EmitterContext context)
{
InstI2iC op = context.GetOp<InstI2iC>();
var src = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitI2I(context, op.SrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat);
}
private static void EmitF2F(
EmitterContext context,
DstFmt srcType,
DstFmt dstType,
IntegerRound roundingMode,
Operand src,
int rd,
bool absolute,
bool negate,
bool saturate)
{
Operand srcB = context.FPAbsNeg(src, absolute, negate, srcType.ToInstFPType());
if (srcType == dstType)
{
switch (op.RoundingMode)
srcB = roundingMode switch
{
case RoundingMode.ToNearest:
srcB = context.FPRound(srcB, srcType.ToInstFPType());
break;
case RoundingMode.TowardsNegativeInfinity:
srcB = context.FPFloor(srcB, srcType.ToInstFPType());
break;
case RoundingMode.TowardsPositiveInfinity:
srcB = context.FPCeiling(srcB, srcType.ToInstFPType());
break;
case RoundingMode.TowardsZero:
srcB = context.FPTruncate(srcB, srcType.ToInstFPType());
break;
}
IntegerRound.Round => context.FPRound(srcB, srcType.ToInstFPType()),
IntegerRound.Floor => context.FPFloor(srcB, srcType.ToInstFPType()),
IntegerRound.Ceil => context.FPCeiling(srcB, srcType.ToInstFPType()),
IntegerRound.Trunc => context.FPTruncate(srcB, srcType.ToInstFPType()),
_ => srcB
};
}
// We don't need to handle conversions between FP16 <-> FP32
// since we do FP16 operations as FP32 directly.
// FP16 <-> FP64 conversions are invalid.
if (srcType == FPType.FP32 && dstType == FPType.FP64)
if (srcType == DstFmt.F32 && dstType == DstFmt.F64)
{
srcB = context.FP32ConvertToFP64(srcB);
}
else if (srcType == FPType.FP64 && dstType == FPType.FP32)
else if (srcType == DstFmt.F64 && dstType == DstFmt.F32)
{
srcB = context.FP64ConvertToFP32(srcB);
}
srcB = context.FPSaturate(srcB, op.Saturate, dstType.ToInstFPType());
srcB = context.FPSaturate(srcB, saturate, dstType.ToInstFPType());
WriteFP(context, dstType, srcB);
WriteFP(context, dstType, srcB, rd);
// TODO: CC.
}
public static void F2I(EmitterContext context)
private static void EmitF2I(
EmitterContext context,
DstFmt srcType,
IDstFmt dstType,
RoundMode2 roundingMode,
Operand src,
int rd,
bool absolute,
bool negate)
{
OpCodeFArith op = (OpCodeFArith)context.CurrOp;
IntegerType intType = (IntegerType)op.RawOpCode.Extract(8, 2);
if (intType == IntegerType.U64)
if (dstType == IDstFmt.U64)
{
context.Config.GpuAccessor.Log("Unimplemented 64-bits F2I.");
return;
}
bool isSmallInt = intType <= IntegerType.U16;
bool isSignedInt = dstType == IDstFmt.S16 || dstType == IDstFmt.S32 || dstType == IDstFmt.S64;
bool isSmallInt = dstType == IDstFmt.U16 || dstType == IDstFmt.S16;
FPType floatType = (FPType)op.RawOpCode.Extract(10, 2);
Operand srcB = context.FPAbsNeg(src, absolute, negate);
bool isSignedInt = op.RawOpCode.Extract(12);
bool negateB = op.RawOpCode.Extract(45);
bool absoluteB = op.RawOpCode.Extract(49);
if (isSignedInt)
srcB = roundingMode switch
{
intType |= IntegerType.S8;
}
Operand srcB = context.FPAbsNeg(GetSrcB(context, floatType), absoluteB, negateB);
switch (op.RoundingMode)
{
case RoundingMode.ToNearest:
srcB = context.FPRound(srcB);
break;
case RoundingMode.TowardsNegativeInfinity:
srcB = context.FPFloor(srcB);
break;
case RoundingMode.TowardsPositiveInfinity:
srcB = context.FPCeiling(srcB);
break;
case RoundingMode.TowardsZero:
srcB = context.FPTruncate(srcB);
break;
}
RoundMode2.Round => context.FPRound(srcB),
RoundMode2.Floor => context.FPFloor(srcB),
RoundMode2.Ceil => context.FPCeiling(srcB),
RoundMode2.Trunc => context.FPTruncate(srcB),
_ => srcB
};
if (!isSignedInt)
{
// Negative float to uint cast is undefined, so we clamp
// the value before conversion.
// Negative float to uint cast is undefined, so we clamp the value before conversion.
srcB = context.FPMaximum(srcB, ConstF(0));
}
srcB = isSignedInt
? context.FPConvertToS32(srcB)
: context.FPConvertToU32(srcB);
srcB = isSignedInt ? context.FPConvertToS32(srcB) : context.FPConvertToU32(srcB);
if (isSmallInt)
{
int min = (int)GetIntMin(intType);
int max = (int)GetIntMax(intType);
int min = (int)GetIntMin(dstType);
int max = (int)GetIntMax(dstType);
srcB = isSignedInt
? context.IClampS32(srcB, Const(min), Const(max))
: context.IClampU32(srcB, Const(min), Const(max));
}
Operand dest = GetDest(context);
Operand dest = GetDest(rd);
context.Copy(dest, srcB);
// TODO: CC.
}
public static void I2F(EmitterContext context)
private static void EmitI2F(
EmitterContext context,
ISrcFmt srcType,
DstFmt dstType,
Operand src,
ByteSel byteSelection,
int rd,
bool absolute,
bool negate)
{
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
FPType dstType = (FPType)op.RawOpCode.Extract(8, 2);
IntegerType srcType = (IntegerType)op.RawOpCode.Extract(10, 2);
bool isSignedInt =
srcType == ISrcFmt.S8 ||
srcType == ISrcFmt.S16 ||
srcType == ISrcFmt.S32 ||
srcType == ISrcFmt.S64;
bool isSmallInt =
srcType == ISrcFmt.U16 ||
srcType == ISrcFmt.S16 ||
srcType == ISrcFmt.U8 ||
srcType == ISrcFmt.S8;
// TODO: Handle S/U64.
bool isSmallInt = srcType <= IntegerType.U16;
bool isSignedInt = op.RawOpCode.Extract(13);
bool negateB = op.RawOpCode.Extract(45);
bool absoluteB = op.RawOpCode.Extract(49);
Operand srcB = context.IAbsNeg(GetSrcB(context), absoluteB, negateB);
Operand srcB = context.IAbsNeg(src, absolute, negate);
if (isSmallInt)
{
int size = srcType == IntegerType.U16 ? 16 : 8;
int size = srcType == ISrcFmt.U16 || srcType == ISrcFmt.S16 ? 16 : 8;
srcB = isSignedInt
? context.BitfieldExtractS32(srcB, Const(op.ByteSelection * 8), Const(size))
: context.BitfieldExtractU32(srcB, Const(op.ByteSelection * 8), Const(size));
? context.BitfieldExtractS32(srcB, Const((int)byteSelection * 8), Const(size))
: context.BitfieldExtractU32(srcB, Const((int)byteSelection * 8), Const(size));
}
srcB = isSignedInt
? context.IConvertS32ToFP(srcB)
: context.IConvertU32ToFP(srcB);
srcB = isSignedInt ? context.IConvertS32ToFP(srcB) : context.IConvertU32ToFP(srcB);
WriteFP(context, dstType, srcB);
WriteFP(context, dstType, srcB, rd);
// TODO: CC.
}
public static void I2I(EmitterContext context)
private static void EmitI2I(
EmitterContext context,
ISrcFmt srcType,
IDstFmt dstType,
Operand src,
ByteSel byteSelection,
int rd,
bool absolute,
bool negate,
bool saturate)
{
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
IntegerType dstType = (IntegerType)op.RawOpCode.Extract(8, 2);
IntegerType srcType = (IntegerType)op.RawOpCode.Extract(10, 2);
if (srcType == IntegerType.U64 || dstType == IntegerType.U64)
if (srcType == ISrcFmt.U64 || dstType == IDstFmt.U64)
{
context.Config.GpuAccessor.Log("Invalid I2I encoding.");
return;
}
bool srcIsSmallInt = srcType <= IntegerType.U16;
bool dstIsSignedInt = op.RawOpCode.Extract(12);
bool srcIsSignedInt = op.RawOpCode.Extract(13);
bool negateB = op.RawOpCode.Extract(45);
bool absoluteB = op.RawOpCode.Extract(49);
Operand srcB = GetSrcB(context);
bool srcIsSignedInt =
srcType == ISrcFmt.S8 ||
srcType == ISrcFmt.S16 ||
srcType == ISrcFmt.S32 ||
srcType == ISrcFmt.S64;
bool dstIsSignedInt =
dstType == IDstFmt.S16 ||
dstType == IDstFmt.S32 ||
dstType == IDstFmt.S64;
bool srcIsSmallInt =
srcType == ISrcFmt.U16 ||
srcType == ISrcFmt.S16 ||
srcType == ISrcFmt.U8 ||
srcType == ISrcFmt.S8;
if (srcIsSmallInt)
{
int size = srcType == IntegerType.U16 ? 16 : 8;
int size = srcType == ISrcFmt.U16 || srcType == ISrcFmt.S16 ? 16 : 8;
srcB = srcIsSignedInt
? context.BitfieldExtractS32(srcB, Const(op.ByteSelection * 8), Const(size))
: context.BitfieldExtractU32(srcB, Const(op.ByteSelection * 8), Const(size));
src = srcIsSignedInt
? context.BitfieldExtractS32(src, Const((int)byteSelection * 8), Const(size))
: context.BitfieldExtractU32(src, Const((int)byteSelection * 8), Const(size));
}
srcB = context.IAbsNeg(srcB, absoluteB, negateB);
src = context.IAbsNeg(src, absolute, negate);
if (op.Saturate)
if (saturate)
{
if (dstIsSignedInt)
{
dstType |= IntegerType.S8;
}
int min = (int)GetIntMin(dstType);
int max = (int)GetIntMax(dstType);
srcB = dstIsSignedInt
? context.IClampS32(srcB, Const(min), Const(max))
: context.IClampU32(srcB, Const(min), Const(max));
src = dstIsSignedInt
? context.IClampS32(src, Const(min), Const(max))
: context.IClampU32(src, Const(min), Const(max));
}
context.Copy(GetDest(context), srcB);
context.Copy(GetDest(rd), src);
// TODO: CC.
}
private static void WriteFP(EmitterContext context, FPType type, Operand srcB)
private static Operand UnpackReg(EmitterContext context, DstFmt floatType, bool h, int reg)
{
Operand dest = GetDest(context);
if (floatType == DstFmt.F32)
{
return GetSrcReg(context, reg);
}
else if (floatType == DstFmt.F16)
{
return GetHalfUnpacked(context, GetSrcReg(context, reg), HalfSwizzle.F16)[h ? 1 : 0];
}
else if (floatType == DstFmt.F64)
{
return GetSrcReg(context, reg, isFP64: true);
}
if (type == FPType.FP32)
throw new ArgumentException($"Invalid floating point type \"{floatType}\".");
}
private static Operand UnpackCbuf(EmitterContext context, DstFmt floatType, bool h, int cbufSlot, int cbufOffset)
{
if (floatType == DstFmt.F32)
{
return GetSrcCbuf(context, cbufSlot, cbufOffset);
}
else if (floatType == DstFmt.F16)
{
return GetHalfUnpacked(context, GetSrcCbuf(context, cbufSlot, cbufOffset), HalfSwizzle.F16)[h ? 1 : 0];
}
else if (floatType == DstFmt.F64)
{
return GetSrcCbuf(context, cbufSlot, cbufOffset, isFP64: true);
}
throw new ArgumentException($"Invalid floating point type \"{floatType}\".");
}
private static Operand UnpackImm(EmitterContext context, DstFmt floatType, bool h, int imm)
{
if (floatType == DstFmt.F32)
{
return GetSrcImm(context, imm);
}
else if (floatType == DstFmt.F16)
{
return GetHalfUnpacked(context, GetSrcImm(context, imm), HalfSwizzle.F16)[h ? 1 : 0];
}
else if (floatType == DstFmt.F64)
{
return GetSrcImm(context, imm, isFP64: true);
}
throw new ArgumentException($"Invalid floating point type \"{floatType}\".");
}
private static void WriteFP(EmitterContext context, DstFmt type, Operand srcB, int rd)
{
Operand dest = GetDest(rd);
if (type == DstFmt.F32)
{
context.Copy(dest, srcB);
}
else if (type == FPType.FP16)
else if (type == DstFmt.F16)
{
context.Copy(dest, context.PackHalf2x16(srcB, ConstF(0)));
}
else /* if (type == FPType.FP64) */
{
Operand dest2 = GetDest2(context);
Operand dest2 = GetDest2(rd);
context.Copy(dest, context.UnpackDouble2x32Low(srcB));
context.Copy(dest2, context.UnpackDouble2x32High(srcB));
}
}
private static Instruction ToInstFPType(this DstFmt type)
{
return type == DstFmt.F64 ? Instruction.FP64 : Instruction.FP32;
}
}
}

View file

@ -1,547 +0,0 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using System;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Dadd(EmitterContext context) => EmitFPAdd(context, Instruction.FP64);
public static void Dfma(EmitterContext context) => EmitFPFma(context, Instruction.FP64);
public static void Dmul(EmitterContext context) => EmitFPMultiply(context, Instruction.FP64);
public static void Fadd(EmitterContext context) => EmitFPAdd(context, Instruction.FP32);
public static void Fcmp(EmitterContext context)
{
OpCode op = context.CurrOp;
Condition cmpOp = (Condition)op.RawOpCode.Extract(48, 4);
Operand srcA = GetSrcA(context);
Operand srcB = GetSrcB(context);
Operand srcC = GetSrcC(context);
Operand cmpRes = GetFPComparison(context, cmpOp, srcC, ConstF(0));
Operand res = context.ConditionalSelect(cmpRes, srcA, srcB);
context.Copy(GetDest(context), res);
}
public static void Ffma(EmitterContext context) => EmitFPFma(context, Instruction.FP32);
public static void Ffma32i(EmitterContext context)
{
IOpCodeFArith op = (IOpCodeFArith)context.CurrOp;
bool saturate = op.RawOpCode.Extract(55);
bool negateA = op.RawOpCode.Extract(56);
bool negateC = op.RawOpCode.Extract(57);
Operand srcA = context.FPNegate(GetSrcA(context), negateA);
Operand srcC = context.FPNegate(GetDest(context), negateC);
Operand srcB = GetSrcB(context);
Operand dest = GetDest(context);
context.Copy(dest, context.FPSaturate(context.FPFusedMultiplyAdd(srcA, srcB, srcC), saturate));
SetFPZnFlags(context, dest, op.SetCondCode);
}
public static void Fmnmx(EmitterContext context)
{
IOpCodeFArith op = (IOpCodeFArith)context.CurrOp;
bool absoluteA = op.AbsoluteA;
bool negateB = op.RawOpCode.Extract(45);
bool negateA = op.RawOpCode.Extract(48);
bool absoluteB = op.RawOpCode.Extract(49);
Operand srcA = context.FPAbsNeg(GetSrcA(context), absoluteA, negateA);
Operand srcB = context.FPAbsNeg(GetSrcB(context), absoluteB, negateB);
Operand resMin = context.FPMinimum(srcA, srcB);
Operand resMax = context.FPMaximum(srcA, srcB);
Operand pred = GetPredicate39(context);
Operand dest = GetDest(context);
context.Copy(dest, context.ConditionalSelect(pred, resMin, resMax));
SetFPZnFlags(context, dest, op.SetCondCode);
}
public static void Fmul(EmitterContext context) => EmitFPMultiply(context, Instruction.FP32);
public static void Fset(EmitterContext context)
{
OpCodeSet op = (OpCodeSet)context.CurrOp;
Condition cmpOp = (Condition)op.RawOpCode.Extract(48, 4);
bool negateA = op.RawOpCode.Extract(43);
bool absoluteB = op.RawOpCode.Extract(44);
bool boolFloat = op.RawOpCode.Extract(52);
bool negateB = op.RawOpCode.Extract(53);
bool absoluteA = op.RawOpCode.Extract(54);
Operand srcA = context.FPAbsNeg(GetSrcA(context), absoluteA, negateA);
Operand srcB = context.FPAbsNeg(GetSrcB(context), absoluteB, negateB);
Operand res = GetFPComparison(context, cmpOp, srcA, srcB);
Operand pred = GetPredicate39(context);
res = GetPredLogicalOp(context, op.LogicalOp, res, pred);
Operand dest = GetDest(context);
if (boolFloat)
{
res = context.ConditionalSelect(res, ConstF(1), Const(0));
context.Copy(dest, res);
SetFPZnFlags(context, res, op.SetCondCode);
}
else
{
context.Copy(dest, res);
SetZnFlags(context, res, op.SetCondCode, op.Extended);
}
// TODO: X
}
public static void Fsetp(EmitterContext context)
{
OpCodeSet op = (OpCodeSet)context.CurrOp;
Condition cmpOp = (Condition)op.RawOpCode.Extract(48, 4);
bool negateB = op.RawOpCode.Extract(6);
bool absoluteA = op.RawOpCode.Extract(7);
bool negateA = op.RawOpCode.Extract(43);
bool absoluteB = op.RawOpCode.Extract(44);
Operand srcA = context.FPAbsNeg(GetSrcA(context), absoluteA, negateA);
Operand srcB = context.FPAbsNeg(GetSrcB(context), absoluteB, negateB);
Operand p0Res = GetFPComparison(context, cmpOp, srcA, srcB);
Operand p1Res = context.BitwiseNot(p0Res);
Operand pred = GetPredicate39(context);
p0Res = GetPredLogicalOp(context, op.LogicalOp, p0Res, pred);
p1Res = GetPredLogicalOp(context, op.LogicalOp, p1Res, pred);
context.Copy(Register(op.Predicate3), p0Res);
context.Copy(Register(op.Predicate0), p1Res);
}
public static void Fswzadd(EmitterContext context)
{
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
int mask = op.RawOpCode.Extract(28, 8);
Operand srcA = GetSrcA(context);
Operand srcB = GetSrcB(context);
Operand dest = GetDest(context);
context.Copy(dest, context.FPSwizzleAdd(srcA, srcB, mask));
SetFPZnFlags(context, dest, op.SetCondCode);
}
public static void Hadd2(EmitterContext context)
{
Hadd2Hmul2Impl(context, isAdd: true);
}
public static void Hfma2(EmitterContext context)
{
IOpCodeHfma op = (IOpCodeHfma)context.CurrOp;
Operand[] srcA = GetHfmaSrcA(context);
Operand[] srcB = GetHfmaSrcB(context);
Operand[] srcC = GetHfmaSrcC(context);
Operand[] res = new Operand[2];
for (int index = 0; index < res.Length; index++)
{
res[index] = context.FPFusedMultiplyAdd(srcA[index], srcB[index], srcC[index]);
res[index] = context.FPSaturate(res[index], op.Saturate);
}
context.Copy(GetDest(context), GetHalfPacked(context, res));
}
public static void Hmul2(EmitterContext context)
{
Hadd2Hmul2Impl(context, isAdd: false);
}
private static void Hadd2Hmul2Impl(EmitterContext context, bool isAdd)
{
OpCode op = context.CurrOp;
bool saturate = op.RawOpCode.Extract(op is IOpCodeReg ? 32 : 52);
Operand[] srcA = GetHalfSrcA(context, isAdd);
Operand[] srcB = GetHalfSrcB(context, !isAdd);
Operand[] res = new Operand[2];
for (int index = 0; index < res.Length; index++)
{
if (isAdd)
{
res[index] = context.FPAdd(srcA[index], srcB[index]);
}
else
{
res[index] = context.FPMultiply(srcA[index], srcB[index]);
}
res[index] = context.FPSaturate(res[index], saturate);
}
context.Copy(GetDest(context), GetHalfPacked(context, res));
}
public static void Hset2(EmitterContext context)
{
OpCodeSet op = (OpCodeSet)context.CurrOp;
bool isRegVariant = op is IOpCodeReg;
bool boolFloat = isRegVariant
? op.RawOpCode.Extract(49)
: op.RawOpCode.Extract(53);
Condition cmpOp = isRegVariant
? (Condition)op.RawOpCode.Extract(35, 4)
: (Condition)op.RawOpCode.Extract(49, 4);
Operand[] srcA = GetHalfSrcA(context);
Operand[] srcB = GetHalfSrcB(context);
Operand[] res = new Operand[2];
res[0] = GetFPComparison(context, cmpOp, srcA[0], srcB[0]);
res[1] = GetFPComparison(context, cmpOp, srcA[1], srcB[1]);
Operand pred = GetPredicate39(context);
res[0] = GetPredLogicalOp(context, op.LogicalOp, res[0], pred);
res[1] = GetPredLogicalOp(context, op.LogicalOp, res[1], pred);
if (boolFloat)
{
res[0] = context.ConditionalSelect(res[0], ConstF(1), Const(0));
res[1] = context.ConditionalSelect(res[1], ConstF(1), Const(0));
context.Copy(GetDest(context), context.PackHalf2x16(res[0], res[1]));
}
else
{
Operand low = context.BitwiseAnd(res[0], Const(0xffff));
Operand high = context.ShiftLeft (res[1], Const(16));
Operand packed = context.BitwiseOr(low, high);
context.Copy(GetDest(context), packed);
}
}
public static void Hsetp2(EmitterContext context)
{
OpCodeSet op = (OpCodeSet)context.CurrOp;
bool isRegVariant = op is IOpCodeReg;
bool hAnd = isRegVariant
? op.RawOpCode.Extract(49)
: op.RawOpCode.Extract(53);
Condition cmpOp = isRegVariant
? (Condition)op.RawOpCode.Extract(35, 4)
: (Condition)op.RawOpCode.Extract(49, 4);
Operand[] srcA = GetHalfSrcA(context);
Operand[] srcB = GetHalfSrcB(context);
Operand p0Res = GetFPComparison(context, cmpOp, srcA[0], srcB[0]);
Operand p1Res = GetFPComparison(context, cmpOp, srcA[1], srcB[1]);
if (hAnd)
{
p0Res = context.BitwiseAnd(p0Res, p1Res);
p1Res = context.BitwiseNot(p0Res);
}
Operand pred = GetPredicate39(context);
p0Res = GetPredLogicalOp(context, op.LogicalOp, p0Res, pred);
p1Res = GetPredLogicalOp(context, op.LogicalOp, p1Res, pred);
context.Copy(Register(op.Predicate3), p0Res);
context.Copy(Register(op.Predicate0), p1Res);
}
public static void Mufu(EmitterContext context)
{
IOpCodeFArith op = (IOpCodeFArith)context.CurrOp;
bool negateB = op.RawOpCode.Extract(48);
Operand res = context.FPAbsNeg(GetSrcA(context), op.AbsoluteA, negateB);
MufuOperation subOp = (MufuOperation)context.CurrOp.RawOpCode.Extract(20, 4);
switch (subOp)
{
case MufuOperation.Cosine:
res = context.FPCosine(res);
break;
case MufuOperation.Sine:
res = context.FPSine(res);
break;
case MufuOperation.ExponentB2:
res = context.FPExponentB2(res);
break;
case MufuOperation.LogarithmB2:
res = context.FPLogarithmB2(res);
break;
case MufuOperation.Reciprocal:
res = context.FPReciprocal(res);
break;
case MufuOperation.ReciprocalSquareRoot:
res = context.FPReciprocalSquareRoot(res);
break;
case MufuOperation.SquareRoot:
res = context.FPSquareRoot(res);
break;
default: /* TODO */ break;
}
context.Copy(GetDest(context), context.FPSaturate(res, op.Saturate));
}
private static void EmitFPAdd(EmitterContext context, Instruction fpType)
{
IOpCodeFArith op = (IOpCodeFArith)context.CurrOp;
bool isFP64 = fpType == Instruction.FP64;
bool absoluteA = op.AbsoluteA, absoluteB, negateA, negateB;
if (op is OpCodeFArithImm32)
{
negateB = op.RawOpCode.Extract(53);
negateA = op.RawOpCode.Extract(56);
absoluteB = op.RawOpCode.Extract(57);
}
else
{
negateB = op.RawOpCode.Extract(45);
negateA = op.RawOpCode.Extract(48);
absoluteB = op.RawOpCode.Extract(49);
}
Operand srcA = context.FPAbsNeg(GetSrcA(context, isFP64), absoluteA, negateA, fpType);
Operand srcB = context.FPAbsNeg(GetSrcB(context, isFP64), absoluteB, negateB, fpType);
Operand res = context.FPSaturate(context.FPAdd(srcA, srcB, fpType), op.Saturate, fpType);
SetDest(context, res, isFP64);
SetFPZnFlags(context, res, op.SetCondCode, fpType);
}
private static void EmitFPFma(EmitterContext context, Instruction fpType)
{
IOpCodeFArith op = (IOpCodeFArith)context.CurrOp;
bool isFP64 = fpType == Instruction.FP64;
bool negateB = op.RawOpCode.Extract(48);
bool negateC = op.RawOpCode.Extract(49);
Operand srcA = GetSrcA(context, isFP64);
Operand srcB = context.FPNegate(GetSrcB(context, isFP64), negateB, fpType);
Operand srcC = context.FPNegate(GetSrcC(context, isFP64), negateC, fpType);
Operand res = context.FPSaturate(context.FPFusedMultiplyAdd(srcA, srcB, srcC, fpType), op.Saturate, fpType);
SetDest(context, res, isFP64);
SetFPZnFlags(context, res, op.SetCondCode, fpType);
}
private static void EmitFPMultiply(EmitterContext context, Instruction fpType)
{
IOpCodeFArith op = (IOpCodeFArith)context.CurrOp;
bool isFP64 = fpType == Instruction.FP64;
bool isImm32 = op is OpCodeFArithImm32;
bool negateB = !isImm32 && op.RawOpCode.Extract(48);
Operand srcA = GetSrcA(context, isFP64);
Operand srcB = context.FPNegate(GetSrcB(context, isFP64), negateB, fpType);
if (op.Scale != FPMultiplyScale.None)
{
Operand scale = op.Scale switch
{
FPMultiplyScale.Divide2 => ConstF(0.5f),
FPMultiplyScale.Divide4 => ConstF(0.25f),
FPMultiplyScale.Divide8 => ConstF(0.125f),
FPMultiplyScale.Multiply2 => ConstF(2f),
FPMultiplyScale.Multiply4 => ConstF(4f),
FPMultiplyScale.Multiply8 => ConstF(8f),
_ => ConstF(1) // Invalid, behave as if it had no scale.
};
if (scale.AsFloat() == 1)
{
context.Config.GpuAccessor.Log($"Invalid FP multiply scale \"{op.Scale}\".");
}
if (isFP64)
{
scale = context.FP32ConvertToFP64(scale);
}
srcA = context.FPMultiply(srcA, scale, fpType);
}
bool saturate = isImm32 ? op.RawOpCode.Extract(55) : op.Saturate;
Operand res = context.FPSaturate(context.FPMultiply(srcA, srcB, fpType), saturate, fpType);
SetDest(context, res, isFP64);
SetFPZnFlags(context, res, op.SetCondCode, fpType);
}
private static Operand GetFPComparison(
EmitterContext context,
Condition cond,
Operand srcA,
Operand srcB)
{
Operand res;
if (cond == Condition.Always)
{
res = Const(IrConsts.True);
}
else if (cond == Condition.Never)
{
res = Const(IrConsts.False);
}
else if (cond == Condition.Nan || cond == Condition.Number)
{
res = context.BitwiseOr(context.IsNan(srcA), context.IsNan(srcB));
if (cond == Condition.Number)
{
res = context.BitwiseNot(res);
}
}
else
{
Instruction inst;
switch (cond & ~Condition.Nan)
{
case Condition.Less: inst = Instruction.CompareLess; break;
case Condition.Equal: inst = Instruction.CompareEqual; break;
case Condition.LessOrEqual: inst = Instruction.CompareLessOrEqual; break;
case Condition.Greater: inst = Instruction.CompareGreater; break;
case Condition.NotEqual: inst = Instruction.CompareNotEqual; break;
case Condition.GreaterOrEqual: inst = Instruction.CompareGreaterOrEqual; break;
default: throw new InvalidOperationException($"Unexpected condition \"{cond}\".");
}
res = context.Add(inst | Instruction.FP32, Local(), srcA, srcB);
if ((cond & Condition.Nan) != 0)
{
res = context.BitwiseOr(res, context.IsNan(srcA));
res = context.BitwiseOr(res, context.IsNan(srcB));
}
}
return res;
}
private static Operand[] GetHfmaSrcA(EmitterContext context)
{
IOpCodeHfma op = (IOpCodeHfma)context.CurrOp;
return GetHalfUnpacked(context, GetSrcA(context), op.SwizzleA);
}
private static Operand[] GetHfmaSrcB(EmitterContext context)
{
IOpCodeHfma op = (IOpCodeHfma)context.CurrOp;
Operand[] operands = GetHalfUnpacked(context, GetSrcB(context), op.SwizzleB);
return FPAbsNeg(context, operands, false, op.NegateB);
}
private static Operand[] GetHfmaSrcC(EmitterContext context)
{
IOpCodeHfma op = (IOpCodeHfma)context.CurrOp;
Operand[] operands = GetHalfUnpacked(context, GetSrcC(context), op.SwizzleC);
return FPAbsNeg(context, operands, false, op.NegateC);
}
private static void SetDest(EmitterContext context, Operand value, bool isFP64)
{
if (isFP64)
{
IOpCodeRd op = (IOpCodeRd)context.CurrOp;
context.Copy(Register(op.Rd.Index, op.Rd.Type), context.UnpackDouble2x32Low(value));
context.Copy(Register(op.Rd.Index | 1, op.Rd.Type), context.UnpackDouble2x32High(value));
}
else
{
context.Copy(GetDest(context), value);
}
}
}
}

View file

@ -0,0 +1,545 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void DaddR(EmitterContext context)
{
InstDaddR op = context.GetOp<InstDaddR>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
EmitFadd(context, Instruction.FP64, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC);
}
public static void DaddI(EmitterContext context)
{
InstDaddI op = context.GetOp<InstDaddI>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
EmitFadd(context, Instruction.FP64, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC);
}
public static void DaddC(EmitterContext context)
{
InstDaddC op = context.GetOp<InstDaddC>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
EmitFadd(context, Instruction.FP64, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC);
}
public static void DfmaR(EmitterContext context)
{
InstDfmaR op = context.GetOp<InstDfmaR>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
var srcC = GetSrcReg(context, op.SrcC, isFP64: true);
EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC);
}
public static void DfmaI(EmitterContext context)
{
InstDfmaI op = context.GetOp<InstDfmaI>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
var srcC = GetSrcReg(context, op.SrcC, isFP64: true);
EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC);
}
public static void DfmaC(EmitterContext context)
{
InstDfmaC op = context.GetOp<InstDfmaC>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
var srcC = GetSrcReg(context, op.SrcC, isFP64: true);
EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC);
}
public static void DfmaRc(EmitterContext context)
{
InstDfmaRc op = context.GetOp<InstDfmaRc>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcReg(context, op.SrcC, isFP64: true);
var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC);
}
public static void DmulR(EmitterContext context)
{
InstDmulR op = context.GetOp<InstDmulR>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
EmitFmul(context, Instruction.FP64, MultiplyScale.NoScale, srcA, srcB, op.Dest, op.NegA, false, op.WriteCC);
}
public static void DmulI(EmitterContext context)
{
InstDmulI op = context.GetOp<InstDmulI>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
EmitFmul(context, Instruction.FP64, MultiplyScale.NoScale, srcA, srcB, op.Dest, op.NegA, false, op.WriteCC);
}
public static void DmulC(EmitterContext context)
{
InstDmulC op = context.GetOp<InstDmulC>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
EmitFmul(context, Instruction.FP64, MultiplyScale.NoScale, srcA, srcB, op.Dest, op.NegA, false, op.WriteCC);
}
public static void FaddR(EmitterContext context)
{
InstFaddR op = context.GetOp<InstFaddR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, op.Sat, op.WriteCC);
}
public static void FaddI(EmitterContext context)
{
InstFaddI op = context.GetOp<InstFaddI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, op.Sat, op.WriteCC);
}
public static void FaddC(EmitterContext context)
{
InstFaddC op = context.GetOp<InstFaddC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, op.Sat, op.WriteCC);
}
public static void Fadd32i(EmitterContext context)
{
InstFadd32i op = context.GetOp<InstFadd32i>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm32);
EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC);
}
public static void FfmaR(EmitterContext context)
{
InstFfmaR op = context.GetOp<InstFfmaR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
}
public static void FfmaI(EmitterContext context)
{
InstFfmaI op = context.GetOp<InstFfmaI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
var srcC = GetSrcReg(context, op.SrcC);
EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
}
public static void FfmaC(EmitterContext context)
{
InstFfmaC op = context.GetOp<InstFfmaC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
}
public static void FfmaRc(EmitterContext context)
{
InstFfmaRc op = context.GetOp<InstFfmaRc>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcC);
var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
}
public static void Ffma32i(EmitterContext context)
{
InstFfma32i op = context.GetOp<InstFfma32i>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm32);
var srcC = GetSrcReg(context, op.SrcC);
EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
}
public static void FmulR(EmitterContext context)
{
InstFmulR op = context.GetOp<InstFmulR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitFmul(context, Instruction.FP32, op.Scale, srcA, srcB, op.Dest, op.NegA, op.Sat, op.WriteCC);
}
public static void FmulI(EmitterContext context)
{
InstFmulI op = context.GetOp<InstFmulI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
EmitFmul(context, Instruction.FP32, op.Scale, srcA, srcB, op.Dest, op.NegA, op.Sat, op.WriteCC);
}
public static void FmulC(EmitterContext context)
{
InstFmulC op = context.GetOp<InstFmulC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitFmul(context, Instruction.FP32, op.Scale, srcA, srcB, op.Dest, op.NegA, op.Sat, op.WriteCC);
}
public static void Fmul32i(EmitterContext context)
{
InstFmul32i op = context.GetOp<InstFmul32i>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm32);
EmitFmul(context, Instruction.FP32, MultiplyScale.NoScale, srcA, srcB, op.Dest, false, op.Sat, op.WriteCC);
}
public static void Hadd2R(EmitterContext context)
{
InstHadd2R op = context.GetOp<InstHadd2R>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegB, op.AbsB);
EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: true, op.Dest, op.Sat);
}
public static void Hadd2I(EmitterContext context)
{
InstHadd2I op = context.GetOp<InstHadd2I>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1);
EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: true, op.Dest, op.Sat);
}
public static void Hadd2C(EmitterContext context)
{
InstHadd2C op = context.GetOp<InstHadd2C>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegB, op.AbsB);
EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: true, op.Dest, op.Sat);
}
public static void Hadd232i(EmitterContext context)
{
InstHadd232i op = context.GetOp<InstHadd232i>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, false);
var srcB = GetHalfSrc(context, op.Imm);
EmitHadd2Hmul2(context, OFmt.F16, srcA, srcB, isAdd: true, op.Dest, op.Sat);
}
public static void Hfma2R(EmitterContext context)
{
InstHfma2R op = context.GetOp<InstHfma2R>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegA, false);
var srcC = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegC, false);
EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat);
}
public static void Hfma2I(EmitterContext context)
{
InstHfma2I op = context.GetOp<InstHfma2I>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1);
var srcC = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegC, false);
EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat);
}
public static void Hfma2C(EmitterContext context)
{
InstHfma2C op = context.GetOp<InstHfma2C>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegA, false);
var srcC = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegC, false);
EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat);
}
public static void Hfma2Rc(EmitterContext context)
{
InstHfma2Rc op = context.GetOp<InstHfma2Rc>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
var srcB = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegA, false);
var srcC = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegC, false);
EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat);
}
public static void Hfma232iI(EmitterContext context)
{
InstHfma232i op = context.GetOp<InstHfma232i>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
var srcB = GetHalfSrc(context, op.Imm);
var srcC = GetHalfSrc(context, HalfSwizzle.F16, op.SrcC, op.NegC, false);
EmitHfma2(context, OFmt.F16, srcA, srcB, srcC, op.Dest, saturate: false);
}
public static void Hmul2R(EmitterContext context)
{
InstHmul2R op = context.GetOp<InstHmul2R>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, op.AbsA);
var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegA, op.AbsB);
EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: false, op.Dest, op.Sat);
}
public static void Hmul2I(EmitterContext context)
{
InstHmul2I op = context.GetOp<InstHmul2I>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1);
EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: false, op.Dest, op.Sat);
}
public static void Hmul2C(EmitterContext context)
{
InstHmul2C op = context.GetOp<InstHmul2C>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, op.AbsA);
var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegA, op.AbsB);
EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: false, op.Dest, op.Sat);
}
public static void Hmul232i(EmitterContext context)
{
InstHmul232i op = context.GetOp<InstHmul232i>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
var srcB = GetHalfSrc(context, op.Imm32);
EmitHadd2Hmul2(context, OFmt.F16, srcA, srcB, isAdd: false, op.Dest, op.Sat);
}
private static void EmitFadd(
EmitterContext context,
Instruction fpType,
Operand srcA,
Operand srcB,
int rd,
bool negateA,
bool negateB,
bool absoluteA,
bool absoluteB,
bool saturate,
bool writeCC)
{
bool isFP64 = fpType == Instruction.FP64;
srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType);
srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType);
Operand res = context.FPSaturate(context.FPAdd(srcA, srcB, fpType), saturate, fpType);
SetDest(context, res, rd, isFP64);
SetFPZnFlags(context, res, writeCC, fpType);
}
private static void EmitFfma(
EmitterContext context,
Instruction fpType,
Operand srcA,
Operand srcB,
Operand srcC,
int rd,
bool negateB,
bool negateC,
bool saturate,
bool writeCC)
{
bool isFP64 = fpType == Instruction.FP64;
srcB = context.FPNegate(srcB, negateB, fpType);
srcC = context.FPNegate(srcC, negateC, fpType);
Operand res = context.FPSaturate(context.FPFusedMultiplyAdd(srcA, srcB, srcC, fpType), saturate, fpType);
SetDest(context, res, rd, isFP64);
SetFPZnFlags(context, res, writeCC, fpType);
}
private static void EmitFmul(
EmitterContext context,
Instruction fpType,
MultiplyScale scale,
Operand srcA,
Operand srcB,
int rd,
bool negateB,
bool saturate,
bool writeCC)
{
bool isFP64 = fpType == Instruction.FP64;
srcB = context.FPNegate(srcB, negateB, fpType);
if (scale != MultiplyScale.NoScale)
{
Operand scaleConst = scale switch
{
MultiplyScale.D2 => ConstF(0.5f),
MultiplyScale.D4 => ConstF(0.25f),
MultiplyScale.D8 => ConstF(0.125f),
MultiplyScale.M2 => ConstF(2f),
MultiplyScale.M4 => ConstF(4f),
MultiplyScale.M8 => ConstF(8f),
_ => ConstF(1f) // Invalid, behave as if it had no scale.
};
if (scaleConst.AsFloat() == 1f)
{
context.Config.GpuAccessor.Log($"Invalid FP multiply scale \"{scale}\".");
}
if (isFP64)
{
scaleConst = context.FP32ConvertToFP64(scaleConst);
}
srcA = context.FPMultiply(srcA, scaleConst, fpType);
}
Operand res = context.FPSaturate(context.FPMultiply(srcA, srcB, fpType), saturate, fpType);
SetDest(context, res, rd, isFP64);
SetFPZnFlags(context, res, writeCC, fpType);
}
private static void EmitHadd2Hmul2(
EmitterContext context,
OFmt swizzle,
Operand[] srcA,
Operand[] srcB,
bool isAdd,
int rd,
bool saturate)
{
Operand[] res = new Operand[2];
for (int index = 0; index < res.Length; index++)
{
if (isAdd)
{
res[index] = context.FPAdd(srcA[index], srcB[index]);
}
else
{
res[index] = context.FPMultiply(srcA[index], srcB[index]);
}
res[index] = context.FPSaturate(res[index], saturate);
}
context.Copy(GetDest(rd), GetHalfPacked(context, swizzle, res, rd));
}
public static void EmitHfma2(
EmitterContext context,
OFmt swizzle,
Operand[] srcA,
Operand[] srcB,
Operand[] srcC,
int rd,
bool saturate)
{
Operand[] res = new Operand[2];
for (int index = 0; index < res.Length; index++)
{
res[index] = context.FPFusedMultiplyAdd(srcA[index], srcB[index], srcC[index]);
res[index] = context.FPSaturate(res[index], saturate);
}
context.Copy(GetDest(rd), GetHalfPacked(context, swizzle, res, rd));
}
private static void SetDest(EmitterContext context, Operand value, int rd, bool isFP64)
{
if (isFP64)
{
context.Copy(GetDest(rd), context.UnpackDouble2x32Low(value));
context.Copy(GetDest2(rd), context.UnpackDouble2x32High(value));
}
else
{
context.Copy(GetDest(rd), value);
}
}
}
}

View file

@ -0,0 +1,419 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using System;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void FcmpR(EmitterContext context)
{
InstFcmpR op = context.GetOp<InstFcmpR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest);
}
public static void FcmpI(EmitterContext context)
{
InstFcmpI op = context.GetOp<InstFcmpI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
var srcC = GetSrcReg(context, op.SrcC);
EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest);
}
public static void FcmpC(EmitterContext context)
{
InstFcmpC op = context.GetOp<InstFcmpC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest);
}
public static void FcmpRc(EmitterContext context)
{
InstFcmpRc op = context.GetOp<InstFcmpRc>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcC);
var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest);
}
public static void FsetR(EmitterContext context)
{
InstFsetR op = context.GetOp<InstFsetR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitFset(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.BVal, op.WriteCC);
}
public static void FsetC(EmitterContext context)
{
InstFsetC op = context.GetOp<InstFsetC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitFset(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.BVal, op.WriteCC);
}
public static void FsetI(EmitterContext context)
{
InstFsetI op = context.GetOp<InstFsetI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
EmitFset(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.BVal, op.WriteCC);
}
public static void FsetpR(EmitterContext context)
{
InstFsetpR op = context.GetOp<InstFsetpR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitFsetp(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.DestPred,
op.DestPredInv,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
op.WriteCC);
}
public static void FsetpI(EmitterContext context)
{
InstFsetpI op = context.GetOp<InstFsetpI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
EmitFsetp(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.DestPred,
op.DestPredInv,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
op.WriteCC);
}
public static void FsetpC(EmitterContext context)
{
InstFsetpC op = context.GetOp<InstFsetpC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitFsetp(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.DestPred,
op.DestPredInv,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
op.WriteCC);
}
public static void Hset2R(EmitterContext context)
{
InstHset2R op = context.GetOp<InstHset2R>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegB, op.AbsB);
EmitHset2(context, op.Cmp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.Bval);
}
public static void Hset2I(EmitterContext context)
{
InstHset2I op = context.GetOp<InstHset2I>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1);
EmitHset2(context, op.Cmp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.Bval);
}
public static void Hset2C(EmitterContext context)
{
InstHset2C op = context.GetOp<InstHset2C>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegB, false);
EmitHset2(context, op.Cmp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.Bval);
}
public static void Hsetp2R(EmitterContext context)
{
InstHsetp2R op = context.GetOp<InstHsetp2R>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegB, op.AbsB);
EmitHsetp2(context, op.FComp2, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.HAnd);
}
public static void Hsetp2I(EmitterContext context)
{
InstHsetp2I op = context.GetOp<InstHsetp2I>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1);
EmitHsetp2(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.HAnd);
}
public static void Hsetp2C(EmitterContext context)
{
InstHsetp2C op = context.GetOp<InstHsetp2C>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegB, op.AbsB);
EmitHsetp2(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.HAnd);
}
private static void EmitFcmp(EmitterContext context, FComp cmpOp, Operand srcA, Operand srcB, Operand srcC, int rd)
{
Operand cmpRes = GetFPComparison(context, cmpOp, srcC, ConstF(0));
Operand res = context.ConditionalSelect(cmpRes, srcA, srcB);
context.Copy(GetDest(rd), res);
}
private static void EmitFset(
EmitterContext context,
FComp cmpOp,
BoolOp logicOp,
Operand srcA,
Operand srcB,
int srcPred,
bool srcPredInv,
int rd,
bool absoluteA,
bool absoluteB,
bool negateA,
bool negateB,
bool boolFloat,
bool writeCC)
{
srcA = context.FPAbsNeg(srcA, absoluteA, negateA);
srcB = context.FPAbsNeg(srcB, absoluteB, negateB);
Operand res = GetFPComparison(context, cmpOp, srcA, srcB);
Operand pred = GetPredicate(context, srcPred, srcPredInv);
res = GetPredLogicalOp(context, logicOp, res, pred);
Operand dest = GetDest(rd);
if (boolFloat)
{
res = context.ConditionalSelect(res, ConstF(1), Const(0));
context.Copy(dest, res);
SetFPZnFlags(context, res, writeCC);
}
else
{
context.Copy(dest, res);
SetZnFlags(context, res, writeCC, extended: false);
}
}
private static void EmitFsetp(
EmitterContext context,
FComp cmpOp,
BoolOp logicOp,
Operand srcA,
Operand srcB,
int srcPred,
bool srcPredInv,
int destPred,
int destPredInv,
bool absoluteA,
bool absoluteB,
bool negateA,
bool negateB,
bool writeCC)
{
srcA = context.FPAbsNeg(srcA, absoluteA, negateA);
srcB = context.FPAbsNeg(srcB, absoluteB, negateB);
Operand p0Res = GetFPComparison(context, cmpOp, srcA, srcB);
Operand p1Res = context.BitwiseNot(p0Res);
Operand pred = GetPredicate(context, srcPred, srcPredInv);
p0Res = GetPredLogicalOp(context, logicOp, p0Res, pred);
p1Res = GetPredLogicalOp(context, logicOp, p1Res, pred);
context.Copy(Register(destPred, RegisterType.Predicate), p0Res);
context.Copy(Register(destPredInv, RegisterType.Predicate), p1Res);
}
private static void EmitHset2(
EmitterContext context,
FComp cmpOp,
BoolOp logicOp,
Operand[] srcA,
Operand[] srcB,
int srcPred,
bool srcPredInv,
int rd,
bool boolFloat)
{
Operand[] res = new Operand[2];
res[0] = GetFPComparison(context, cmpOp, srcA[0], srcB[0]);
res[1] = GetFPComparison(context, cmpOp, srcA[1], srcB[1]);
Operand pred = GetPredicate(context, srcPred, srcPredInv);
res[0] = GetPredLogicalOp(context, logicOp, res[0], pred);
res[1] = GetPredLogicalOp(context, logicOp, res[1], pred);
if (boolFloat)
{
res[0] = context.ConditionalSelect(res[0], ConstF(1), Const(0));
res[1] = context.ConditionalSelect(res[1], ConstF(1), Const(0));
context.Copy(GetDest(rd), context.PackHalf2x16(res[0], res[1]));
}
else
{
Operand low = context.BitwiseAnd(res[0], Const(0xffff));
Operand high = context.ShiftLeft (res[1], Const(16));
Operand packed = context.BitwiseOr(low, high);
context.Copy(GetDest(rd), packed);
}
}
private static void EmitHsetp2(
EmitterContext context,
FComp cmpOp,
BoolOp logicOp,
Operand[] srcA,
Operand[] srcB,
int srcPred,
bool srcPredInv,
int destPred,
int destPredInv,
bool hAnd)
{
Operand p0Res = GetFPComparison(context, cmpOp, srcA[0], srcB[0]);
Operand p1Res = GetFPComparison(context, cmpOp, srcA[1], srcB[1]);
if (hAnd)
{
p0Res = context.BitwiseAnd(p0Res, p1Res);
p1Res = context.BitwiseNot(p0Res);
}
Operand pred = GetPredicate(context, srcPred, srcPredInv);
p0Res = GetPredLogicalOp(context, logicOp, p0Res, pred);
p1Res = GetPredLogicalOp(context, logicOp, p1Res, pred);
context.Copy(Register(destPred, RegisterType.Predicate), p0Res);
context.Copy(Register(destPredInv, RegisterType.Predicate), p1Res);
}
private static Operand GetFPComparison(EmitterContext context, FComp cond, Operand srcA, Operand srcB)
{
Operand res;
if (cond == FComp.T)
{
res = Const(IrConsts.True);
}
else if (cond == FComp.F)
{
res = Const(IrConsts.False);
}
else if (cond == FComp.Nan || cond == FComp.Num)
{
res = context.BitwiseOr(context.IsNan(srcA), context.IsNan(srcB));
if (cond == FComp.Num)
{
res = context.BitwiseNot(res);
}
}
else
{
Instruction inst;
switch (cond & ~FComp.Nan)
{
case FComp.Lt: inst = Instruction.CompareLess; break;
case FComp.Eq: inst = Instruction.CompareEqual; break;
case FComp.Le: inst = Instruction.CompareLessOrEqual; break;
case FComp.Gt: inst = Instruction.CompareGreater; break;
case FComp.Ne: inst = Instruction.CompareNotEqual; break;
case FComp.Ge: inst = Instruction.CompareGreaterOrEqual; break;
default: throw new ArgumentException($"Unexpected condition \"{cond}\".");
}
res = context.Add(inst | Instruction.FP32, Local(), srcA, srcB);
if ((cond & FComp.Nan) != 0)
{
res = context.BitwiseOr(res, context.IsNan(srcA));
res = context.BitwiseOr(res, context.IsNan(srcB));
}
}
return res;
}
}
}

View file

@ -0,0 +1,70 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void FmnmxR(EmitterContext context)
{
InstFmnmxR op = context.GetOp<InstFmnmxR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC);
}
public static void FmnmxI(EmitterContext context)
{
InstFmnmxI op = context.GetOp<InstFmnmxI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC);
}
public static void FmnmxC(EmitterContext context)
{
InstFmnmxC op = context.GetOp<InstFmnmxC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC);
}
private static void EmitFmnmx(
EmitterContext context,
Operand srcA,
Operand srcB,
Operand srcPred,
int rd,
bool absoluteA,
bool absoluteB,
bool negateA,
bool negateB,
bool writeCC)
{
srcA = context.FPAbsNeg(srcA, absoluteA, negateA);
srcB = context.FPAbsNeg(srcB, absoluteB, negateB);
Operand resMin = context.FPMinimum(srcA, srcB);
Operand resMax = context.FPMaximum(srcA, srcB);
Operand dest = GetDest(rd);
context.Copy(dest, context.ConditionalSelect(srcPred, resMin, resMax));
SetFPZnFlags(context, dest, writeCC);
}
}
}

View file

@ -13,32 +13,38 @@ namespace Ryujinx.Graphics.Shader.Instructions
{
public static void Bra(EmitterContext context)
{
EmitBranch(context, context.CurrBlock.Branch.Address);
InstBra op = context.GetOp<InstBra>();
EmitBranch(context, context.CurrBlock.Successors[^1].Address);
}
public static void Brk(EmitterContext context)
{
InstBrk op = context.GetOp<InstBrk>();
EmitBrkOrSync(context);
}
public static void Brx(EmitterContext context)
{
OpCodeBranchIndir op = (OpCodeBranchIndir)context.CurrOp;
InstBrx op = context.GetOp<InstBrx>();
InstOp currOp = context.CurrOp;
int startIndex = context.CurrBlock.HasNext() ? 1 : 0;
if (op.PossibleTargets.Count == 0)
if (context.CurrBlock.Successors.Count <= startIndex)
{
context.Config.GpuAccessor.Log($"Failed to find targets for BRX instruction at 0x{op.Address:X}.");
context.Config.GpuAccessor.Log($"Failed to find targets for BRX instruction at 0x{currOp.Address:X}.");
return;
}
int offset = (int)op.Address + 8 + op.Offset;
int offset = (int)currOp.GetAbsoluteAddress();
Operand address = context.IAdd(Register(op.Ra), Const(offset));
Operand address = context.IAdd(Register(op.SrcA, RegisterType.Gpr), Const(offset));
// Sorting the target addresses in descending order improves the code,
// since it will always check the most distant targets first, then the
// near ones. This can be easily transformed into if/else statements.
IOrderedEnumerable<Block> sortedTargets = op.PossibleTargets.OrderByDescending(x => x.Address);
var sortedTargets = context.CurrBlock.Successors.Skip(startIndex).OrderByDescending(x => x.Address);
Block lastTarget = sortedTargets.LastOrDefault();
@ -59,28 +65,24 @@ namespace Ryujinx.Graphics.Shader.Instructions
public static void Cal(EmitterContext context)
{
OpCodeBranch op = (OpCodeBranch)context.CurrOp;
InstCal op = context.GetOp<InstCal>();
context.Call(context.GetFunctionId(op.GetAbsoluteAddress()), false);
}
public static void Depbar(EmitterContext context)
{
context.Call(context.GetFunctionId(context.CurrOp.GetAbsoluteAddress()), false);
}
public static void Exit(EmitterContext context)
{
InstExit op = context.GetOp<InstExit>();
if (context.IsNonMain)
{
context.Config.GpuAccessor.Log("Invalid exit on non-main function.");
return;
}
OpCodeExit op = (OpCodeExit)context.CurrOp;
// TODO: Figure out how this is supposed to work in the
// presence of other condition codes.
if (op.Condition == Condition.Always)
if (op.Ccc == Ccc.T)
{
context.Return();
}
@ -88,20 +90,22 @@ namespace Ryujinx.Graphics.Shader.Instructions
public static void Kil(EmitterContext context)
{
context.Discard();
}
InstKil op = context.GetOp<InstKil>();
public static void Nop(EmitterContext context)
{
context.Discard();
}
public static void Pbk(EmitterContext context)
{
InstPbk op = context.GetOp<InstPbk>();
EmitPbkOrSsy(context);
}
public static void Ret(EmitterContext context)
{
InstRet op = context.GetOp<InstRet>();
if (context.IsNonMain)
{
context.Return();
@ -114,63 +118,62 @@ namespace Ryujinx.Graphics.Shader.Instructions
public static void Ssy(EmitterContext context)
{
InstSsy op = context.GetOp<InstSsy>();
EmitPbkOrSsy(context);
}
public static void Sync(EmitterContext context)
{
InstSync op = context.GetOp<InstSync>();
EmitBrkOrSync(context);
}
private static void EmitPbkOrSsy(EmitterContext context)
{
OpCodePush op = (OpCodePush)context.CurrOp;
var consumers = context.CurrBlock.PushOpCodes.First(x => x.Op.Address == context.CurrOp.Address).Consumers;
foreach (KeyValuePair<OpCodeBranchPop, Operand> kv in op.PopOps)
foreach (KeyValuePair<Block, Operand> kv in consumers)
{
OpCodeBranchPop opSync = kv.Key;
Block consumerBlock = kv.Key;
Operand local = kv.Value;
int pushOpIndex = opSync.Targets[op];
int id = consumerBlock.SyncTargets[context.CurrOp.Address].PushOpId;
context.Copy(local, Const(pushOpIndex));
context.Copy(local, Const(id));
}
}
private static void EmitBrkOrSync(EmitterContext context)
{
OpCodeBranchPop op = (OpCodeBranchPop)context.CurrOp;
var targets = context.CurrBlock.SyncTargets;
if (op.Targets.Count == 1)
if (targets.Count == 1)
{
// If we have only one target, then the SSY/PBK is basically
// a branch, we can produce better codegen for this case.
OpCodePush pushOp = op.Targets.Keys.First();
EmitBranch(context, pushOp.GetAbsoluteAddress());
EmitBranch(context, targets.Values.First().PushOpInfo.Op.GetAbsoluteAddress());
}
else
{
// TODO: Support CC here aswell (condition).
foreach (KeyValuePair<OpCodePush, int> kv in op.Targets)
foreach (SyncTarget target in targets.Values)
{
OpCodePush pushOp = kv.Key;
PushOpInfo pushOpInfo = target.PushOpInfo;
Operand label = context.GetLabel(pushOp.GetAbsoluteAddress());
Operand label = context.GetLabel(pushOpInfo.Op.GetAbsoluteAddress());
Operand local = pushOpInfo.Consumers[context.CurrBlock];
Operand local = pushOp.PopOps[op];
int pushOpIndex = kv.Value;
context.BranchIfTrue(label, context.ICompareEqual(local, Const(pushOpIndex)));
context.BranchIfTrue(label, context.ICompareEqual(local, Const(target.PushOpId)));
}
}
}
private static void EmitBranch(EmitterContext context, ulong address)
{
OpCode op = context.CurrOp;
InstOp op = context.CurrOp;
InstConditional opCond = new InstConditional(op.RawOpCode);
// If we're branching to the next instruction, then the branch
// is useless and we can ignore it.
@ -181,17 +184,17 @@ namespace Ryujinx.Graphics.Shader.Instructions
Operand label = context.GetLabel(address);
Operand pred = Register(op.Predicate);
Operand pred = Register(opCond.Pred, RegisterType.Predicate);
if (op is OpCodeConditional opCond && opCond.Condition != Condition.Always)
if (opCond.Ccc != Ccc.T)
{
Operand cond = GetCondition(context, opCond.Condition);
Operand cond = GetCondition(context, opCond.Ccc);
if (op.Predicate.IsPT)
if (opCond.Pred == RegisterConsts.PredicateTrueIndex)
{
pred = cond;
}
else if (op.InvertPredicate)
else if (opCond.PredInv)
{
pred = context.BitwiseAnd(context.BitwiseNot(pred), cond);
}
@ -202,11 +205,11 @@ namespace Ryujinx.Graphics.Shader.Instructions
context.BranchIfTrue(label, pred);
}
else if (op.Predicate.IsPT)
else if (opCond.Pred == RegisterConsts.PredicateTrueIndex)
{
context.Branch(label);
}
else if (op.InvertPredicate)
else if (opCond.PredInv)
{
context.BranchIfFalse(label, pred);
}
@ -216,16 +219,16 @@ namespace Ryujinx.Graphics.Shader.Instructions
}
}
private static Operand GetCondition(EmitterContext context, Condition cond)
private static Operand GetCondition(EmitterContext context, Ccc cond)
{
// TODO: More condition codes, figure out how they work.
switch (cond)
{
case Condition.Equal:
case Condition.EqualUnordered:
case Ccc.Eq:
case Ccc.Equ:
return GetZF();
case Condition.NotEqual:
case Condition.NotEqualUnordered:
case Ccc.Ne:
case Ccc.Neu:
return context.BitwiseNot(GetZF());
}

View file

@ -2,6 +2,7 @@ using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using System;
using System.Runtime.CompilerServices;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
@ -29,184 +30,101 @@ namespace Ryujinx.Graphics.Shader.Instructions
return Register(3, RegisterType.Flag);
}
public static Operand GetDest(EmitterContext context)
public static Operand GetDest(int rd)
{
return Register(((IOpCodeRd)context.CurrOp).Rd);
return Register(rd, RegisterType.Gpr);
}
public static Operand GetDest2(EmitterContext context)
public static Operand GetDest2(int rd)
{
Register rd = ((IOpCodeRd)context.CurrOp).Rd;
return Register(rd.Index | 1, rd.Type);
return Register(rd | 1, RegisterType.Gpr);
}
public static Operand GetSrcA(EmitterContext context, bool isFP64 = false)
{
IOpCodeRa op = (IOpCodeRa)context.CurrOp;
if (isFP64)
{
return context.PackDouble2x32(Register(op.Ra.Index, op.Ra.Type), Register(op.Ra.Index | 1, op.Ra.Type));
}
else
{
return Register(op.Ra);
}
}
public static Operand GetSrcB(EmitterContext context, FPType floatType)
{
if (floatType == FPType.FP32)
{
return GetSrcB(context);
}
else if (floatType == FPType.FP16)
{
int h = context.CurrOp.RawOpCode.Extract(41, 1);
return GetHalfUnpacked(context, GetSrcB(context), FPHalfSwizzle.FP16)[h];
}
else if (floatType == FPType.FP64)
{
return GetSrcB(context, true);
}
throw new ArgumentException($"Invalid floating point type \"{floatType}\".");
}
public static Operand GetSrcB(EmitterContext context, bool isFP64 = false)
public static Operand GetSrcCbuf(EmitterContext context, int cbufSlot, int cbufOffset, bool isFP64 = false)
{
if (isFP64)
{
switch (context.CurrOp)
{
case IOpCodeCbuf op:
return context.PackDouble2x32(
context.Config.CreateCbuf(op.Slot, op.Offset),
context.Config.CreateCbuf(op.Slot, op.Offset + 1));
case IOpCodeImmF op:
return context.FP32ConvertToFP64(ConstF(op.Immediate));
case IOpCodeReg op:
return context.PackDouble2x32(Register(op.Rb.Index, op.Rb.Type), Register(op.Rb.Index | 1, op.Rb.Type));
case IOpCodeRegCbuf op:
return context.PackDouble2x32(Register(op.Rc.Index, op.Rc.Type), Register(op.Rc.Index | 1, op.Rc.Type));
}
return context.PackDouble2x32(
context.Config.CreateCbuf(cbufSlot, cbufOffset),
context.Config.CreateCbuf(cbufSlot, cbufOffset + 1));
}
else
{
switch (context.CurrOp)
{
case IOpCodeCbuf op:
return context.Config.CreateCbuf(op.Slot, op.Offset);
case IOpCodeImm op:
return Const(op.Immediate);
case IOpCodeImmF op:
return ConstF(op.Immediate);
case IOpCodeReg op:
return Register(op.Rb);
case IOpCodeRegCbuf op:
return Register(op.Rc);
}
return context.Config.CreateCbuf(cbufSlot, cbufOffset);
}
throw new InvalidOperationException($"Unexpected opcode type \"{context.CurrOp.GetType().Name}\".");
}
public static Operand GetSrcC(EmitterContext context, bool isFP64 = false)
public static Operand GetSrcImm(EmitterContext context, int imm, bool isFP64 = false)
{
if (isFP64)
{
switch (context.CurrOp)
{
case IOpCodeRegCbuf op:
return context.PackDouble2x32(
context.Config.CreateCbuf(op.Slot, op.Offset),
context.Config.CreateCbuf(op.Slot, op.Offset + 1));
case IOpCodeRc op:
return context.PackDouble2x32(Register(op.Rc.Index, op.Rc.Type), Register(op.Rc.Index | 1, op.Rc.Type));
}
return context.FP32ConvertToFP64(Const(imm));
}
else
{
switch (context.CurrOp)
{
case IOpCodeRegCbuf op:
return context.Config.CreateCbuf(op.Slot, op.Offset);
case IOpCodeRc op:
return Register(op.Rc);
}
return Const(imm);
}
throw new InvalidOperationException($"Unexpected opcode type \"{context.CurrOp.GetType().Name}\".");
}
public static Operand[] GetHalfSrcA(EmitterContext context, bool isAdd = false)
public static Operand GetSrcReg(EmitterContext context, int reg, bool isFP64 = false)
{
OpCode op = context.CurrOp;
bool absoluteA = false, negateA = false;
if (op is OpCodeAluImm32 && isAdd)
if (isFP64)
{
negateA = op.RawOpCode.Extract(56);
return context.PackDouble2x32(Register(reg, RegisterType.Gpr), Register(reg | 1, RegisterType.Gpr));
}
else if (isAdd || op is IOpCodeCbuf || op is IOpCodeImm)
else
{
negateA = op.RawOpCode.Extract(43);
absoluteA = op.RawOpCode.Extract(44);
return Register(reg, RegisterType.Gpr);
}
else if (op is IOpCodeReg)
{
absoluteA = op.RawOpCode.Extract(44);
}
FPHalfSwizzle swizzle = (FPHalfSwizzle)op.RawOpCode.Extract(47, 2);
Operand[] operands = GetHalfUnpacked(context, GetSrcA(context), swizzle);
return FPAbsNeg(context, operands, absoluteA, negateA);
}
public static Operand[] GetHalfSrcB(EmitterContext context, bool isMul = false)
public static Operand[] GetHalfSrc(
EmitterContext context,
HalfSwizzle swizzle,
int ra,
bool negate,
bool absolute)
{
OpCode op = context.CurrOp;
Operand[] operands = GetHalfUnpacked(context, GetSrcReg(context, ra), swizzle);
FPHalfSwizzle swizzle = FPHalfSwizzle.FP16;
return FPAbsNeg(context, operands, absolute, negate);
}
bool absoluteB = false, negateB = false;
public static Operand[] GetHalfSrc(
EmitterContext context,
HalfSwizzle swizzle,
int cbufSlot,
int cbufOffset,
bool negate,
bool absolute)
{
Operand[] operands = GetHalfUnpacked(context, GetSrcCbuf(context, cbufSlot, cbufOffset), swizzle);
if (op is IOpCodeReg)
return FPAbsNeg(context, operands, absolute, negate);
}
public static Operand[] GetHalfSrc(EmitterContext context, int immH0, int immH1)
{
ushort low = (ushort)(immH0 << 6);
ushort high = (ushort)(immH1 << 6);
return new Operand[]
{
swizzle = (FPHalfSwizzle)op.RawOpCode.Extract(28, 2);
ConstF((float)Unsafe.As<ushort, Half>(ref low)),
ConstF((float)Unsafe.As<ushort, Half>(ref high))
};
}
absoluteB = op.RawOpCode.Extract(30);
negateB = op.RawOpCode.Extract(31);
}
else if (op is IOpCodeCbuf)
public static Operand[] GetHalfSrc(EmitterContext context, int imm32)
{
ushort low = (ushort)imm32;
ushort high = (ushort)(imm32 >> 16);
return new Operand[]
{
swizzle = FPHalfSwizzle.FP32;
absoluteB = op.RawOpCode.Extract(54);
if (!isMul)
{
negateB = op.RawOpCode.Extract(56);
}
}
Operand[] operands = GetHalfUnpacked(context, GetSrcB(context), swizzle);
return FPAbsNeg(context, operands, absoluteB, negateB);
ConstF((float)Unsafe.As<ushort, Half>(ref low)),
ConstF((float)Unsafe.As<ushort, Half>(ref high))
};
}
public static Operand[] FPAbsNeg(EmitterContext context, Operand[] operands, bool abs, bool neg)
@ -219,27 +137,27 @@ namespace Ryujinx.Graphics.Shader.Instructions
return operands;
}
public static Operand[] GetHalfUnpacked(EmitterContext context, Operand src, FPHalfSwizzle swizzle)
public static Operand[] GetHalfUnpacked(EmitterContext context, Operand src, HalfSwizzle swizzle)
{
switch (swizzle)
{
case FPHalfSwizzle.FP16:
case HalfSwizzle.F16:
return new Operand[]
{
context.UnpackHalf2x16Low (src),
context.UnpackHalf2x16High(src)
};
case FPHalfSwizzle.FP32: return new Operand[] { src, src };
case HalfSwizzle.F32: return new Operand[] { src, src };
case FPHalfSwizzle.DupH0:
case HalfSwizzle.H0H0:
return new Operand[]
{
context.UnpackHalf2x16Low(src),
context.UnpackHalf2x16Low(src)
};
case FPHalfSwizzle.DupH1:
case HalfSwizzle.H1H1:
return new Operand[]
{
context.UnpackHalf2x16High(src),
@ -250,33 +168,24 @@ namespace Ryujinx.Graphics.Shader.Instructions
throw new ArgumentException($"Invalid swizzle \"{swizzle}\".");
}
public static Operand GetHalfPacked(EmitterContext context, Operand[] results)
public static Operand GetHalfPacked(EmitterContext context, OFmt swizzle, Operand[] results, int rd)
{
OpCode op = context.CurrOp;
FPHalfSwizzle swizzle = FPHalfSwizzle.FP16;
if (!(op is OpCodeAluImm32))
{
swizzle = (FPHalfSwizzle)context.CurrOp.RawOpCode.Extract(49, 2);
}
switch (swizzle)
{
case FPHalfSwizzle.FP16: return context.PackHalf2x16(results[0], results[1]);
case OFmt.F16: return context.PackHalf2x16(results[0], results[1]);
case FPHalfSwizzle.FP32: return results[0];
case OFmt.F32: return results[0];
case FPHalfSwizzle.DupH0:
case OFmt.MrgH0:
{
Operand h1 = GetHalfDest(context, isHigh: true);
Operand h1 = GetHalfDest(context, rd, isHigh: true);
return context.PackHalf2x16(results[0], h1);
}
case FPHalfSwizzle.DupH1:
case OFmt.MrgH1:
{
Operand h0 = GetHalfDest(context, isHigh: false);
Operand h0 = GetHalfDest(context, rd, isHigh: false);
return context.PackHalf2x16(h0, results[1]);
}
@ -285,25 +194,23 @@ namespace Ryujinx.Graphics.Shader.Instructions
throw new ArgumentException($"Invalid swizzle \"{swizzle}\".");
}
public static Operand GetHalfDest(EmitterContext context, bool isHigh)
public static Operand GetHalfDest(EmitterContext context, int rd, bool isHigh)
{
if (isHigh)
{
return context.UnpackHalf2x16High(GetDest(context));
return context.UnpackHalf2x16High(GetDest(rd));
}
else
{
return context.UnpackHalf2x16Low(GetDest(context));
return context.UnpackHalf2x16Low(GetDest(rd));
}
}
public static Operand GetPredicate39(EmitterContext context)
public static Operand GetPredicate(EmitterContext context, int pred, bool not)
{
IOpCodePredicate39 op = (IOpCodePredicate39)context.CurrOp;
Operand local = Register(pred, RegisterType.Predicate);
Operand local = Register(op.Predicate39);
if (op.InvertP)
if (not)
{
local = context.BitwiseNot(local);
}
@ -311,6 +218,26 @@ namespace Ryujinx.Graphics.Shader.Instructions
return local;
}
public static int Imm16ToSInt(int imm16)
{
return (short)imm16;
}
public static int Imm20ToFloat(int imm20)
{
return imm20 << 12;
}
public static int Imm20ToSInt(int imm20)
{
return (imm20 << 12) >> 12;
}
public static int Imm24ToSInt(int imm24)
{
return (imm24 << 8) >> 8;
}
public static Operand SignExtendTo32(EmitterContext context, Operand src, int srcBits)
{
return context.BitfieldExtractS32(src, Const(0), Const(srcBits));
@ -318,7 +245,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
public static Operand ZeroExtendTo32(EmitterContext context, Operand src, int srcBits)
{
int mask = (int)(0xffffffffu >> (32 - srcBits));
int mask = (int)(uint.MaxValue >> (32 - srcBits));
return context.BitwiseAnd(src, Const(mask));
}

View file

@ -0,0 +1,656 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void IaddR(EmitterContext context)
{
InstIaddR op = context.GetOp<InstIaddR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC);
}
public static void IaddI(EmitterContext context)
{
InstIaddI op = context.GetOp<InstIaddI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC);
}
public static void IaddC(EmitterContext context)
{
InstIaddC op = context.GetOp<InstIaddC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC);
}
public static void Iadd32i(EmitterContext context)
{
InstIadd32i op = context.GetOp<InstIadd32i>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm32);
EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC);
}
public static void Iadd3R(EmitterContext context)
{
InstIadd3R op = context.GetOp<InstIadd3R>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitIadd3(context, op.Lrs, srcA, srcB, srcC, op.Apart, op.Bpart, op.Cpart, op.Dest, op.NegA, op.NegB, op.NegC);
}
public static void Iadd3I(EmitterContext context)
{
InstIadd3I op = context.GetOp<InstIadd3I>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
var srcC = GetSrcReg(context, op.SrcC);
EmitIadd3(context, Lrs.None, srcA, srcB, srcC, HalfSelect.B32, HalfSelect.B32, HalfSelect.B32, op.Dest, op.NegA, op.NegB, op.NegC);
}
public static void Iadd3C(EmitterContext context)
{
InstIadd3C op = context.GetOp<InstIadd3C>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitIadd3(context, Lrs.None, srcA, srcB, srcC, HalfSelect.B32, HalfSelect.B32, HalfSelect.B32, op.Dest, op.NegA, op.NegB, op.NegC);
}
public static void ImadR(EmitterContext context)
{
InstImadR op = context.GetOp<InstImadR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
}
public static void ImadI(EmitterContext context)
{
InstImadI op = context.GetOp<InstImadI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
var srcC = GetSrcReg(context, op.SrcC);
EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
}
public static void ImadC(EmitterContext context)
{
InstImadC op = context.GetOp<InstImadC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
}
public static void ImadRc(EmitterContext context)
{
InstImadRc op = context.GetOp<InstImadRc>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcC);
var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
}
public static void Imad32i(EmitterContext context)
{
InstImad32i op = context.GetOp<InstImad32i>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm32);
var srcC = GetSrcReg(context, op.Dest);
EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
}
public static void IscaddR(EmitterContext context)
{
InstIscaddR op = context.GetOp<InstIscaddR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, op.AvgMode, op.WriteCC);
}
public static void IscaddI(EmitterContext context)
{
InstIscaddI op = context.GetOp<InstIscaddI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, op.AvgMode, op.WriteCC);
}
public static void IscaddC(EmitterContext context)
{
InstIscaddC op = context.GetOp<InstIscaddC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, op.AvgMode, op.WriteCC);
}
public static void Iscadd32i(EmitterContext context)
{
InstIscadd32i op = context.GetOp<InstIscadd32i>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm32);
EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, AvgMode.NoNeg, op.WriteCC);
}
public static void LeaR(EmitterContext context)
{
InstLeaR op = context.GetOp<InstLeaR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitLea(context, srcA, srcB, op.Dest, op.NegA, op.ImmU5);
}
public static void LeaI(EmitterContext context)
{
InstLeaI op = context.GetOp<InstLeaI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitLea(context, srcA, srcB, op.Dest, op.NegA, op.ImmU5);
}
public static void LeaC(EmitterContext context)
{
InstLeaC op = context.GetOp<InstLeaC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitLea(context, srcA, srcB, op.Dest, op.NegA, op.ImmU5);
}
public static void LeaHiR(EmitterContext context)
{
InstLeaHiR op = context.GetOp<InstLeaHiR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitLeaHi(context, srcA, srcB, srcC, op.Dest, op.NegA, op.ImmU5);
}
public static void LeaHiC(EmitterContext context)
{
InstLeaHiC op = context.GetOp<InstLeaHiC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitLeaHi(context, srcA, srcB, srcC, op.Dest, op.NegA, op.ImmU5);
}
public static void XmadR(EmitterContext context)
{
InstXmadR op = context.GetOp<InstXmadR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, op.HiloB, op.Psl, op.Mrg, op.X, op.WriteCC);
}
public static void XmadI(EmitterContext context)
{
InstXmadI op = context.GetOp<InstXmadI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm16);
var srcC = GetSrcReg(context, op.SrcC);
EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, false, op.Psl, op.Mrg, op.X, op.WriteCC);
}
public static void XmadC(EmitterContext context)
{
InstXmadC op = context.GetOp<InstXmadC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, op.HiloB, op.Psl, op.Mrg, op.X, op.WriteCC);
}
public static void XmadRc(EmitterContext context)
{
InstXmadRc op = context.GetOp<InstXmadRc>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcC);
var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, op.HiloB, false, false, op.X, op.WriteCC);
}
private static void EmitIadd(
EmitterContext context,
Operand srcA,
Operand srcB,
int rd,
AvgMode avgMode,
bool extended,
bool writeCC)
{
srcA = context.INegate(srcA, avgMode == AvgMode.NegA);
srcB = context.INegate(srcB, avgMode == AvgMode.NegB);
Operand res = context.IAdd(srcA, srcB);
if (extended)
{
res = context.IAdd(res, context.BitwiseAnd(GetCF(), Const(1)));
}
SetIaddFlags(context, res, srcA, srcB, writeCC, extended);
// TODO: SAT.
context.Copy(GetDest(rd), res);
}
private static void EmitIadd3(
EmitterContext context,
Lrs mode,
Operand srcA,
Operand srcB,
Operand srcC,
HalfSelect partA,
HalfSelect partB,
HalfSelect partC,
int rd,
bool negateA,
bool negateB,
bool negateC)
{
Operand Extend(Operand src, HalfSelect part)
{
if (part == HalfSelect.B32)
{
return src;
}
if (part == HalfSelect.H0)
{
return context.BitwiseAnd(src, Const(0xffff));
}
else if (part == HalfSelect.H1)
{
return context.ShiftRightU32(src, Const(16));
}
else
{
context.Config.GpuAccessor.Log($"Iadd3 has invalid component selection {part}.");
}
return src;
}
srcA = context.INegate(Extend(srcA, partA), negateA);
srcB = context.INegate(Extend(srcB, partB), negateB);
srcC = context.INegate(Extend(srcC, partC), negateC);
Operand res = context.IAdd(srcA, srcB);
if (mode != Lrs.None)
{
if (mode == Lrs.LeftShift)
{
res = context.ShiftLeft(res, Const(16));
}
else if (mode == Lrs.RightShift)
{
res = context.ShiftRightU32(res, Const(16));
}
else
{
// TODO: Warning.
}
}
res = context.IAdd(res, srcC);
context.Copy(GetDest(rd), res);
// TODO: CC, X, corner cases.
}
public static void EmitImad(
EmitterContext context,
Operand srcA,
Operand srcB,
Operand srcC,
int rd,
AvgMode avgMode,
bool signedA,
bool signedB,
bool high)
{
srcB = context.INegate(srcB, avgMode == AvgMode.NegA);
srcC = context.INegate(srcC, avgMode == AvgMode.NegB);
Operand res;
if (high)
{
if (signedA && signedB)
{
res = context.MultiplyHighS32(srcA, srcB);
}
else
{
res = context.MultiplyHighU32(srcA, srcB);
if (signedA)
{
res = context.IAdd(res, context.IMultiply(srcB, context.ShiftRightS32(srcA, Const(31))));
}
else if (signedB)
{
res = context.IAdd(res, context.IMultiply(srcA, context.ShiftRightS32(srcB, Const(31))));
}
}
}
else
{
res = context.IMultiply(srcA, srcB);
}
res = context.IAdd(res, srcC);
// TODO: CC, X, SAT, and more?
context.Copy(GetDest(rd), res);
}
private static void EmitIscadd(
EmitterContext context,
Operand srcA,
Operand srcB,
int rd,
int shift,
AvgMode avgMode,
bool writeCC)
{
srcA = context.ShiftLeft(srcA, Const(shift));
srcA = context.INegate(srcA, avgMode == AvgMode.NegA);
srcB = context.INegate(srcB, avgMode == AvgMode.NegB);
Operand res = context.IAdd(srcA, srcB);
SetIaddFlags(context, res, srcA, srcB, writeCC, false);
context.Copy(GetDest(rd), res);
}
public static void EmitLea(EmitterContext context, Operand srcA, Operand srcB, int rd, bool negateA, int shift)
{
srcA = context.ShiftLeft(srcA, Const(shift));
srcA = context.INegate(srcA, negateA);
Operand res = context.IAdd(srcA, srcB);
context.Copy(GetDest(rd), res);
// TODO: CC, X.
}
private static void EmitLeaHi(
EmitterContext context,
Operand srcA,
Operand srcB,
Operand srcC,
int rd,
bool negateA,
int shift)
{
Operand aLow = context.ShiftLeft(srcA, Const(shift));
Operand aHigh = shift == 0 ? Const(0) : context.ShiftRightU32(srcA, Const(32 - shift));
aHigh = context.BitwiseOr(aHigh, context.ShiftLeft(srcC, Const(shift)));
if (negateA)
{
// Perform 64-bit negation by doing bitwise not of the value,
// then adding 1 and carrying over from low to high.
aLow = context.BitwiseNot(aLow);
aHigh = context.BitwiseNot(aHigh);
aLow = AddWithCarry(context, aLow, Const(1), out Operand aLowCOut);
aHigh = context.IAdd(aHigh, aLowCOut);
}
Operand res = context.IAdd(aHigh, srcB);
context.Copy(GetDest(rd), res);
// TODO: CC, X.
}
public static void EmitXmad(
EmitterContext context,
XmadCop2 mode,
Operand srcA,
Operand srcB,
Operand srcC,
int rd,
bool signedA,
bool signedB,
bool highA,
bool highB,
bool productShiftLeft,
bool merge,
bool extended,
bool writeCC)
{
XmadCop modeConv;
switch (mode)
{
case XmadCop2.Cfull:
modeConv = XmadCop.Cfull;
break;
case XmadCop2.Clo:
modeConv = XmadCop.Clo;
break;
case XmadCop2.Chi:
modeConv = XmadCop.Chi;
break;
case XmadCop2.Csfu:
modeConv = XmadCop.Csfu;
break;
default:
context.Config.GpuAccessor.Log($"Invalid XMAD mode \"{mode}\".");
return;
}
EmitXmad(context, modeConv, srcA, srcB, srcC, rd, signedA, signedB, highA, highB, productShiftLeft, merge, extended, writeCC);
}
public static void EmitXmad(
EmitterContext context,
XmadCop mode,
Operand srcA,
Operand srcB,
Operand srcC,
int rd,
bool signedA,
bool signedB,
bool highA,
bool highB,
bool productShiftLeft,
bool merge,
bool extended,
bool writeCC)
{
var srcBUnmodified = srcB;
Operand Extend16To32(Operand src, bool high, bool signed)
{
if (signed && high)
{
return context.ShiftRightS32(src, Const(16));
}
else if (signed)
{
return context.BitfieldExtractS32(src, Const(0), Const(16));
}
else if (high)
{
return context.ShiftRightU32(src, Const(16));
}
else
{
return context.BitwiseAnd(src, Const(0xffff));
}
}
srcA = Extend16To32(srcA, highA, signedA);
srcB = Extend16To32(srcB, highB, signedB);
Operand res = context.IMultiply(srcA, srcB);
if (productShiftLeft)
{
res = context.ShiftLeft(res, Const(16));
}
switch (mode)
{
case XmadCop.Cfull:
break;
case XmadCop.Clo:
srcC = Extend16To32(srcC, high: false, signed: false);
break;
case XmadCop.Chi:
srcC = Extend16To32(srcC, high: true, signed: false);
break;
case XmadCop.Cbcc:
srcC = context.IAdd(srcC, context.ShiftLeft(srcBUnmodified, Const(16)));
break;
case XmadCop.Csfu:
Operand signAdjustA = context.ShiftLeft(context.ShiftRightU32(srcA, Const(31)), Const(16));
Operand signAdjustB = context.ShiftLeft(context.ShiftRightU32(srcB, Const(31)), Const(16));
srcC = context.ISubtract(srcC, context.IAdd(signAdjustA, signAdjustB));
break;
default:
context.Config.GpuAccessor.Log($"Invalid XMAD mode \"{mode}\".");
return;
}
Operand product = res;
if (extended)
{
// Add with carry.
res = context.IAdd(res, context.BitwiseAnd(GetCF(), Const(1)));
}
else
{
// Add (no carry in).
res = context.IAdd(res, srcC);
}
SetIaddFlags(context, res, product, srcC, writeCC, extended);
if (merge)
{
res = context.BitwiseAnd(res, Const(0xffff));
res = context.BitwiseOr(res, context.ShiftLeft(srcBUnmodified, Const(16)));
}
context.Copy(GetDest(rd), res);
}
private static void SetIaddFlags(EmitterContext context, Operand res, Operand srcA, Operand srcB, bool setCC, bool extended)
{
if (!setCC)
{
return;
}
if (extended)
{
// C = (d == a && CIn) || d < a
Operand tempC0 = context.ICompareEqual(res, srcA);
Operand tempC1 = context.ICompareLessUnsigned(res, srcA);
tempC0 = context.BitwiseAnd(tempC0, GetCF());
context.Copy(GetCF(), context.BitwiseOr(tempC0, tempC1));
}
else
{
// C = d < a
context.Copy(GetCF(), context.ICompareLessUnsigned(res, srcA));
}
// V = (d ^ a) & ~(a ^ b) < 0
Operand tempV0 = context.BitwiseExclusiveOr(res, srcA);
Operand tempV1 = context.BitwiseExclusiveOr(srcA, srcB);
tempV1 = context.BitwiseNot(tempV1);
Operand tempV = context.BitwiseAnd(tempV0, tempV1);
context.Copy(GetVF(), context.ICompareLess(tempV, Const(0)));
SetZnFlags(context, res, setCC: true, extended: extended);
}
}
}

View file

@ -0,0 +1,327 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using System;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Csetp(EmitterContext context)
{
InstCsetp op = context.GetOp<InstCsetp>();
// TODO: Implement that properly.
Operand p0Res = Const(IrConsts.True);
Operand p1Res = context.BitwiseNot(p0Res);
Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
p0Res = GetPredLogicalOp(context, op.Bop, p0Res, srcPred);
p1Res = GetPredLogicalOp(context, op.Bop, p1Res, srcPred);
context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res);
context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res);
}
public static void IcmpR(EmitterContext context)
{
InstIcmpR op = context.GetOp<InstIcmpR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed);
}
public static void IcmpI(EmitterContext context)
{
InstIcmpI op = context.GetOp<InstIcmpI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
var srcC = GetSrcReg(context, op.SrcC);
EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed);
}
public static void IcmpC(EmitterContext context)
{
InstIcmpC op = context.GetOp<InstIcmpC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed);
}
public static void IcmpRc(EmitterContext context)
{
InstIcmpRc op = context.GetOp<InstIcmpRc>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcC);
var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed);
}
public static void IsetR(EmitterContext context)
{
InstIsetR op = context.GetOp<InstIsetR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitIset(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.BVal, op.Signed, op.X, op.WriteCC);
}
public static void IsetI(EmitterContext context)
{
InstIsetI op = context.GetOp<InstIsetI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitIset(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.BVal, op.Signed, op.X, op.WriteCC);
}
public static void IsetC(EmitterContext context)
{
InstIsetC op = context.GetOp<InstIsetC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitIset(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.BVal, op.Signed, op.X, op.WriteCC);
}
public static void IsetpR(EmitterContext context)
{
InstIsetpR op = context.GetOp<InstIsetpR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitIsetp(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.Signed, op.X);
}
public static void IsetpI(EmitterContext context)
{
InstIsetpI op = context.GetOp<InstIsetpI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitIsetp(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.Signed, op.X);
}
public static void IsetpC(EmitterContext context)
{
InstIsetpC op = context.GetOp<InstIsetpC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitIsetp(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.Signed, op.X);
}
private static void EmitIcmp(
EmitterContext context,
IComp cmpOp,
Operand srcA,
Operand srcB,
Operand srcC,
int rd,
bool isSigned)
{
Operand cmpRes = GetIntComparison(context, cmpOp, srcC, Const(0), isSigned);
Operand res = context.ConditionalSelect(cmpRes, srcA, srcB);
context.Copy(GetDest(rd), res);
}
private static void EmitIset(
EmitterContext context,
IComp cmpOp,
BoolOp logicOp,
Operand srcA,
Operand srcB,
int srcPred,
bool srcPredInv,
int rd,
bool boolFloat,
bool isSigned,
bool extended,
bool writeCC)
{
Operand res = GetIntComparison(context, cmpOp, srcA, srcB, isSigned, extended);
Operand pred = GetPredicate(context, srcPred, srcPredInv);
res = GetPredLogicalOp(context, logicOp, res, pred);
Operand dest = GetDest(rd);
if (boolFloat)
{
res = context.ConditionalSelect(res, ConstF(1), Const(0));
context.Copy(dest, res);
SetFPZnFlags(context, res, writeCC);
}
else
{
context.Copy(dest, res);
SetZnFlags(context, res, writeCC, extended);
}
}
private static void EmitIsetp(
EmitterContext context,
IComp cmpOp,
BoolOp logicOp,
Operand srcA,
Operand srcB,
int srcPred,
bool srcPredInv,
int destPred,
int destPredInv,
bool isSigned,
bool extended)
{
Operand p0Res = GetIntComparison(context, cmpOp, srcA, srcB, isSigned, extended);
Operand p1Res = context.BitwiseNot(p0Res);
Operand pred = GetPredicate(context, srcPred, srcPredInv);
p0Res = GetPredLogicalOp(context, logicOp, p0Res, pred);
p1Res = GetPredLogicalOp(context, logicOp, p1Res, pred);
context.Copy(Register(destPred, RegisterType.Predicate), p0Res);
context.Copy(Register(destPredInv, RegisterType.Predicate), p1Res);
}
private static Operand GetIntComparison(
EmitterContext context,
IComp cond,
Operand srcA,
Operand srcB,
bool isSigned,
bool extended)
{
return extended
? GetIntComparisonExtended(context, cond, srcA, srcB, isSigned)
: GetIntComparison(context, cond, srcA, srcB, isSigned);
}
private static Operand GetIntComparisonExtended(EmitterContext context, IComp cond, Operand srcA, Operand srcB, bool isSigned)
{
Operand res;
if (cond == IComp.T)
{
res = Const(IrConsts.True);
}
else if (cond == IComp.F)
{
res = Const(IrConsts.False);
}
else
{
res = context.ISubtract(srcA, srcB);
res = context.IAdd(res, context.BitwiseNot(GetCF()));
switch (cond)
{
case IComp.Eq: // r = xh == yh && xl == yl
res = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), GetZF());
break;
case IComp.Lt: // r = xh < yh || (xh == yh && xl < yl)
Operand notC = context.BitwiseNot(GetCF());
Operand prevLt = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), notC);
res = isSigned
? context.BitwiseOr(context.ICompareLess(srcA, srcB), prevLt)
: context.BitwiseOr(context.ICompareLessUnsigned(srcA, srcB), prevLt);
break;
case IComp.Le: // r = xh < yh || (xh == yh && xl <= yl)
Operand zOrNotC = context.BitwiseOr(GetZF(), context.BitwiseNot(GetCF()));
Operand prevLe = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), zOrNotC);
res = isSigned
? context.BitwiseOr(context.ICompareLess(srcA, srcB), prevLe)
: context.BitwiseOr(context.ICompareLessUnsigned(srcA, srcB), prevLe);
break;
case IComp.Gt: // r = xh > yh || (xh == yh && xl > yl)
Operand notZAndC = context.BitwiseAnd(context.BitwiseNot(GetZF()), GetCF());
Operand prevGt = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), notZAndC);
res = isSigned
? context.BitwiseOr(context.ICompareGreater(srcA, srcB), prevGt)
: context.BitwiseOr(context.ICompareGreaterUnsigned(srcA, srcB), prevGt);
break;
case IComp.Ge: // r = xh > yh || (xh == yh && xl >= yl)
Operand prevGe = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), GetCF());
res = isSigned
? context.BitwiseOr(context.ICompareGreater(srcA, srcB), prevGe)
: context.BitwiseOr(context.ICompareGreaterUnsigned(srcA, srcB), prevGe);
break;
case IComp.Ne: // r = xh != yh || xl != yl
res = context.BitwiseOr(context.ICompareNotEqual(srcA, srcB), context.BitwiseNot(GetZF()));
break;
default:
throw new ArgumentException($"Unexpected condition \"{cond}\".");
}
}
return res;
}
private static Operand GetIntComparison(EmitterContext context, IComp cond, Operand srcA, Operand srcB, bool isSigned)
{
Operand res;
if (cond == IComp.T)
{
res = Const(IrConsts.True);
}
else if (cond == IComp.F)
{
res = Const(IrConsts.False);
}
else
{
var inst = cond switch
{
IComp.Lt => Instruction.CompareLessU32,
IComp.Eq => Instruction.CompareEqual,
IComp.Le => Instruction.CompareLessOrEqualU32,
IComp.Gt => Instruction.CompareGreaterU32,
IComp.Ne => Instruction.CompareNotEqual,
IComp.Ge => Instruction.CompareGreaterOrEqualU32,
_ => throw new InvalidOperationException($"Unexpected condition \"{cond}\".")
};
if (isSigned)
{
switch (cond)
{
case IComp.Lt: inst = Instruction.CompareLess; break;
case IComp.Le: inst = Instruction.CompareLessOrEqual; break;
case IComp.Gt: inst = Instruction.CompareGreater; break;
case IComp.Ge: inst = Instruction.CompareGreaterOrEqual; break;
}
}
res = context.Add(inst, Local(), srcA, srcB);
}
return res;
}
}
}

View file

@ -0,0 +1,167 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
private const int PT = RegisterConsts.PredicateTrueIndex;
public static void LopR(EmitterContext context)
{
InstLopR op = context.GetOp<InstLopR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitLop(context, op.Lop, op.PredicateOp, srcA, srcB, op.Dest, op.DestPred, op.NegA, op.NegB, op.X, op.WriteCC);
}
public static void LopI(EmitterContext context)
{
InstLopI op = context.GetOp<InstLopI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitLop(context, op.LogicOp, op.PredicateOp, srcA, srcB, op.Dest, op.DestPred, op.NegA, op.NegB, op.X, op.WriteCC);
}
public static void LopC(EmitterContext context)
{
InstLopC op = context.GetOp<InstLopC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitLop(context, op.LogicOp, op.PredicateOp, srcA, srcB, op.Dest, op.DestPred, op.NegA, op.NegB, op.X, op.WriteCC);
}
public static void Lop32i(EmitterContext context)
{
InstLop32i op = context.GetOp<InstLop32i>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm32);
EmitLop(context, op.LogicOp, PredicateOp.F, srcA, srcB, op.Dest, PT, op.NegA, op.NegB, op.X, op.WriteCC);
}
public static void Lop3R(EmitterContext context)
{
InstLop3R op = context.GetOp<InstLop3R>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitLop3(context, op.Imm, op.PredicateOp, srcA, srcB, srcC, op.Dest, op.DestPred, op.X, op.WriteCC);
}
public static void Lop3I(EmitterContext context)
{
InstLop3I op = context.GetOp<InstLop3I>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
var srcC = GetSrcReg(context, op.SrcC);
EmitLop3(context, op.Imm, PredicateOp.F, srcA, srcB, srcC, op.Dest, PT, false, op.WriteCC);
}
public static void Lop3C(EmitterContext context)
{
InstLop3C op = context.GetOp<InstLop3C>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitLop3(context, op.Imm, PredicateOp.F, srcA, srcB, srcC, op.Dest, PT, false, op.WriteCC);
}
private static void EmitLop(
EmitterContext context,
LogicOp logicOp,
PredicateOp predOp,
Operand srcA,
Operand srcB,
int rd,
int destPred,
bool invertA,
bool invertB,
bool extended,
bool writeCC)
{
srcA = context.BitwiseNot(srcA, invertA);
srcB = context.BitwiseNot(srcB, invertB);
Operand res = logicOp switch
{
LogicOp.And => res = context.BitwiseAnd(srcA, srcB),
LogicOp.Or => res = context.BitwiseOr(srcA, srcB),
LogicOp.Xor => res = context.BitwiseExclusiveOr(srcA, srcB),
_ => srcB
};
EmitLopPredWrite(context, res, predOp, destPred);
context.Copy(GetDest(rd), res);
SetZnFlags(context, res, writeCC, extended);
}
private static void EmitLop3(
EmitterContext context,
int truthTable,
PredicateOp predOp,
Operand srcA,
Operand srcB,
Operand srcC,
int rd,
int destPred,
bool extended,
bool writeCC)
{
Operand res = Lop3Expression.GetFromTruthTable(context, srcA, srcB, srcC, truthTable);
EmitLopPredWrite(context, res, predOp, destPred);
context.Copy(GetDest(rd), res);
SetZnFlags(context, res, writeCC, extended);
}
private static void EmitLopPredWrite(EmitterContext context, Operand result, PredicateOp predOp, int pred)
{
if (pred != RegisterConsts.PredicateTrueIndex)
{
Operand pRes;
if (predOp == PredicateOp.F)
{
pRes = Const(IrConsts.False);
}
else if (predOp == PredicateOp.T)
{
pRes = Const(IrConsts.True);
}
else if (predOp == PredicateOp.Z)
{
pRes = context.ICompareEqual(result, Const(0));
}
else /* if (predOp == Pop.Nz) */
{
pRes = context.ICompareNotEqual(result, Const(0));
}
context.Copy(Register(pred, RegisterType.Predicate), pRes);
}
}
}
}

View file

@ -0,0 +1,71 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void ImnmxR(EmitterContext context)
{
InstImnmxR op = context.GetOp<InstImnmxR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitImnmx(context, srcA, srcB, srcPred, op.Dest, op.Signed, op.WriteCC);
}
public static void ImnmxI(EmitterContext context)
{
InstImnmxI op = context.GetOp<InstImnmxI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitImnmx(context, srcA, srcB, srcPred, op.Dest, op.Signed, op.WriteCC);
}
public static void ImnmxC(EmitterContext context)
{
InstImnmxC op = context.GetOp<InstImnmxC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitImnmx(context, srcA, srcB, srcPred, op.Dest, op.Signed, op.WriteCC);
}
private static void EmitImnmx(
EmitterContext context,
Operand srcA,
Operand srcB,
Operand srcPred,
int rd,
bool isSignedInt,
bool writeCC)
{
Operand resMin = isSignedInt
? context.IMinimumS32(srcA, srcB)
: context.IMinimumU32(srcA, srcB);
Operand resMax = isSignedInt
? context.IMaximumS32(srcA, srcB)
: context.IMaximumU32(srcA, srcB);
Operand res = context.ConditionalSelect(srcPred, resMin, resMax);
context.Copy(GetDest(rd), res);
SetZnFlags(context, res, writeCC);
// TODO: X flags.
}
}
}

View file

@ -15,369 +15,160 @@ namespace Ryujinx.Graphics.Shader.Instructions
Shared
}
public static void Al2p(EmitterContext context)
{
OpCodeAl2p op = (OpCodeAl2p)context.CurrOp;
if (op.Rd.IsRZ)
{
return;
}
context.Copy(Register(op.Rd), context.IAdd(Register(op.Ra), Const(op.Immediate)));
}
public static void Ald(EmitterContext context)
{
OpCodeAttribute op = (OpCodeAttribute)context.CurrOp;
Operand primVertex = context.Copy(GetSrcC(context));
for (int index = 0; index < op.Count; index++)
{
Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
if (rd.IsRZ)
{
break;
}
if (op.Phys)
{
Operand userAttrOffset = context.ISubtract(GetSrcA(context), Const(AttributeConsts.UserAttributeBase));
Operand userAttrIndex = context.ShiftRightU32(userAttrOffset, Const(2));
context.Copy(Register(rd), context.LoadAttribute(Const(AttributeConsts.UserAttributeBase), userAttrIndex, primVertex));
context.Config.SetUsedFeature(FeatureFlags.IaIndexing);
}
else if (op.Rc.IsRZ)
{
Operand src = Attribute(op.AttributeOffset + index * 4);
context.FlagAttributeRead(src.Value);
context.Copy(Register(rd), src);
}
else
{
Operand src = Const(op.AttributeOffset + index * 4);
context.FlagAttributeRead(src.Value);
context.Copy(Register(rd), context.LoadAttribute(src, Const(0), primVertex));
}
}
}
public static void Ast(EmitterContext context)
{
OpCodeAttribute op = (OpCodeAttribute)context.CurrOp;
for (int index = 0; index < op.Count; index++)
{
if (op.Rd.Index + index > RegisterConsts.RegisterZeroIndex)
{
break;
}
Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
if (op.Phys)
{
Operand userAttrOffset = context.ISubtract(GetSrcA(context), Const(AttributeConsts.UserAttributeBase));
Operand userAttrIndex = context.ShiftRightU32(userAttrOffset, Const(2));
context.StoreAttribute(Const(AttributeConsts.UserAttributeBase), userAttrIndex, Register(rd));
context.Config.SetUsedFeature(FeatureFlags.OaIndexing);
}
else
{
Operand dest = Attribute(op.AttributeOffset + index * 4);
context.FlagAttributeWritten(dest.Value);
context.Copy(dest, Register(rd));
}
}
}
public static void Atom(EmitterContext context)
{
OpCodeAtom op = (OpCodeAtom)context.CurrOp;
InstAtom op = context.GetOp<InstAtom>();
ReductionType type = (ReductionType)op.RawOpCode.Extract(49, 2);
int sOffset = (op.Imm20 << 12) >> 12;
int sOffset = (op.RawOpCode.Extract(28, 20) << 12) >> 12;
(Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, sOffset);
(Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, sOffset);
Operand value = GetSrcReg(context, op.SrcB);
Operand value = GetSrcB(context);
Operand res = EmitAtomicOp(context, Instruction.MrGlobal, op.Op, op.Size, addrLow, addrHigh, value);
Operand res = EmitAtomicOp(
context,
Instruction.MrGlobal,
op.AtomicOp,
type,
addrLow,
addrHigh,
value);
context.Copy(GetDest(context), res);
context.Copy(GetDest(op.Dest), res);
}
public static void Atoms(EmitterContext context)
{
OpCodeAtom op = (OpCodeAtom)context.CurrOp;
InstAtoms op = context.GetOp<InstAtoms>();
ReductionType type = op.RawOpCode.Extract(28, 2) switch
{
0 => ReductionType.U32,
1 => ReductionType.S32,
2 => ReductionType.U64,
_ => ReductionType.S64
};
Operand offset = context.ShiftRightU32(GetSrcReg(context, op.SrcA), Const(2));
Operand offset = context.ShiftRightU32(GetSrcA(context), Const(2));
int sOffset = (op.RawOpCode.Extract(30, 22) << 10) >> 10;
int sOffset = (op.Imm22 << 10) >> 10;
offset = context.IAdd(offset, Const(sOffset));
Operand value = GetSrcB(context);
Operand value = GetSrcReg(context, op.SrcB);
Operand res = EmitAtomicOp(
context,
Instruction.MrShared,
op.AtomicOp,
type,
offset,
Const(0),
value);
context.Copy(GetDest(context), res);
}
public static void Bar(EmitterContext context)
{
OpCodeBarrier op = (OpCodeBarrier)context.CurrOp;
// TODO: Support other modes.
if (op.Mode == BarrierMode.Sync)
AtomSize size = op.AtomsSize switch
{
context.Barrier();
}
else
{
context.Config.GpuAccessor.Log($"Invalid barrier mode: {op.Mode}.");
}
}
AtomsSize.S32 => AtomSize.S32,
AtomsSize.U64 => AtomSize.U64,
AtomsSize.S64 => AtomSize.S64,
_ => AtomSize.U32
};
public static void Ipa(EmitterContext context)
{
OpCodeIpa op = (OpCodeIpa)context.CurrOp;
Operand res = EmitAtomicOp(context, Instruction.MrShared, op.AtomOp, size, offset, Const(0), value);
context.FlagAttributeRead(op.AttributeOffset);
Operand res;
if (op.Idx)
{
Operand userAttrOffset = context.ISubtract(GetSrcA(context), Const(AttributeConsts.UserAttributeBase));
Operand userAttrIndex = context.ShiftRightU32(userAttrOffset, Const(2));
res = context.LoadAttribute(Const(AttributeConsts.UserAttributeBase), userAttrIndex, Const(0));
res = context.FPMultiply(res, Attribute(AttributeConsts.PositionW));
context.Config.SetUsedFeature(FeatureFlags.IaIndexing);
}
else
{
res = Attribute(op.AttributeOffset);
if (op.AttributeOffset >= AttributeConsts.UserAttributeBase &&
op.AttributeOffset < AttributeConsts.UserAttributeEnd)
{
int index = (op.AttributeOffset - AttributeConsts.UserAttributeBase) >> 4;
if (context.Config.ImapTypes[index].GetFirstUsedType() == PixelImap.Perspective)
{
res = context.FPMultiply(res, Attribute(AttributeConsts.PositionW));
}
}
}
if (op.Mode == InterpolationMode.Default)
{
Operand srcB = GetSrcB(context);
res = context.FPMultiply(res, srcB);
}
res = context.FPSaturate(res, op.Saturate);
context.Copy(GetDest(context), res);
}
public static void Isberd(EmitterContext context)
{
// This instruction performs a load from ISBE memory,
// however it seems to be only used to get some vertex
// input data, so we instead propagate the offset so that
// it can be used on the attribute load.
context.Copy(GetDest(context), GetSrcA(context));
}
public static void Ld(EmitterContext context)
{
EmitLoad(context, MemoryRegion.Local);
context.Copy(GetDest(op.Dest), res);
}
public static void Ldc(EmitterContext context)
{
OpCodeLdc op = (OpCodeLdc)context.CurrOp;
InstLdc op = context.GetOp<InstLdc>();
if (op.Size > IntegerSize.B64)
if (op.LsSize > LsSize2.B64)
{
context.Config.GpuAccessor.Log($"Invalid LDC size: {op.Size}.");
context.Config.GpuAccessor.Log($"Invalid LDC size: {op.LsSize}.");
return;
}
bool isSmallInt = op.Size < IntegerSize.B32;
bool isSmallInt = op.LsSize < LsSize2.B32;
int count = op.Size == IntegerSize.B64 ? 2 : 1;
int count = op.LsSize == LsSize2.B64 ? 2 : 1;
Operand slot = Const(op.Slot);
Operand srcA = GetSrcA(context);
Operand slot = Const(op.CbufSlot);
Operand srcA = GetSrcReg(context, op.SrcA);
if (op.IndexMode == CbIndexMode.Is ||
op.IndexMode == CbIndexMode.Isl)
if (op.AddressMode == AddressMode.Is || op.AddressMode == AddressMode.Isl)
{
slot = context.IAdd(slot, context.BitfieldExtractU32(srcA, Const(16), Const(16)));
srcA = context.BitwiseAnd(srcA, Const(0xffff));
}
Operand addr = context.IAdd(srcA, Const(op.Offset));
Operand addr = context.IAdd(srcA, Const(Imm16ToSInt(op.CbufOffset)));
Operand wordOffset = context.ShiftRightU32(addr, Const(2));
Operand bitOffset = GetBitOffset(context, addr);
for (int index = 0; index < count; index++)
{
Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
Register dest = new Register(op.Dest + index, RegisterType.Gpr);
if (rd.IsRZ)
if (dest.IsRZ)
{
break;
}
Operand offset = context.IAdd(wordOffset, Const(index));
Operand value = context.LoadConstant(slot, offset);
if (isSmallInt)
{
value = ExtractSmallInt(context, op.Size, bitOffset, value);
value = ExtractSmallInt(context, (LsSize)op.LsSize, bitOffset, value);
}
context.Copy(Register(rd), value);
context.Copy(Register(dest), value);
}
}
public static void Ldg(EmitterContext context)
{
EmitLoadGlobal(context);
InstLdg op = context.GetOp<InstLdg>();
EmitLdg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E);
}
public static void Ldl(EmitterContext context)
{
InstLdl op = context.GetOp<InstLdl>();
EmitLoad(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
}
public static void Lds(EmitterContext context)
{
EmitLoad(context, MemoryRegion.Shared);
}
InstLds op = context.GetOp<InstLds>();
public static void Membar(EmitterContext context)
{
OpCodeMemoryBarrier op = (OpCodeMemoryBarrier)context.CurrOp;
if (op.Level == BarrierLevel.Cta)
{
context.GroupMemoryBarrier();
}
else
{
context.MemoryBarrier();
}
}
public static void Out(EmitterContext context)
{
OpCode op = context.CurrOp;
bool emit = op.RawOpCode.Extract(39);
bool cut = op.RawOpCode.Extract(40);
if (!(emit || cut))
{
context.Config.GpuAccessor.Log("Invalid OUT encoding.");
}
if (emit)
{
context.EmitVertex();
}
if (cut)
{
context.EndPrimitive();
}
EmitLoad(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
}
public static void Red(EmitterContext context)
{
OpCodeRed op = (OpCodeRed)context.CurrOp;
InstRed op = context.GetOp<InstRed>();
(Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
(Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, op.Imm20);
EmitAtomicOp(
context,
Instruction.MrGlobal,
op.AtomicOp,
op.Type,
addrLow,
addrHigh,
GetDest(context));
}
public static void St(EmitterContext context)
{
EmitStore(context, MemoryRegion.Local);
EmitAtomicOp(context, Instruction.MrGlobal, (AtomOp)op.RedOp, op.RedSize, addrLow, addrHigh, GetDest(op.SrcB));
}
public static void Stg(EmitterContext context)
{
EmitStoreGlobal(context);
InstStg op = context.GetOp<InstStg>();
EmitStg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E);
}
public static void Stl(EmitterContext context)
{
InstStl op = context.GetOp<InstStl>();
EmitStore(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
}
public static void Sts(EmitterContext context)
{
EmitStore(context, MemoryRegion.Shared);
InstSts op = context.GetOp<InstSts>();
EmitStore(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
}
private static Operand EmitAtomicOp(
EmitterContext context,
Instruction mr,
AtomicOp op,
ReductionType type,
Operand addrLow,
Operand addrHigh,
Operand value)
Instruction mr,
AtomOp op,
AtomSize type,
Operand addrLow,
Operand addrHigh,
Operand value)
{
Operand res = Const(0);
switch (op)
{
case AtomicOp.Add:
if (type == ReductionType.S32 || type == ReductionType.U32)
case AtomOp.Add:
if (type == AtomSize.S32 || type == AtomSize.U32)
{
res = context.AtomicAdd(mr, addrLow, addrHigh, value);
}
@ -386,8 +177,8 @@ namespace Ryujinx.Graphics.Shader.Instructions
context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
}
break;
case AtomicOp.BitwiseAnd:
if (type == ReductionType.S32 || type == ReductionType.U32)
case AtomOp.And:
if (type == AtomSize.S32 || type == AtomSize.U32)
{
res = context.AtomicAnd(mr, addrLow, addrHigh, value);
}
@ -396,8 +187,8 @@ namespace Ryujinx.Graphics.Shader.Instructions
context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
}
break;
case AtomicOp.BitwiseExclusiveOr:
if (type == ReductionType.S32 || type == ReductionType.U32)
case AtomOp.Xor:
if (type == AtomSize.S32 || type == AtomSize.U32)
{
res = context.AtomicXor(mr, addrLow, addrHigh, value);
}
@ -406,8 +197,8 @@ namespace Ryujinx.Graphics.Shader.Instructions
context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
}
break;
case AtomicOp.BitwiseOr:
if (type == ReductionType.S32 || type == ReductionType.U32)
case AtomOp.Or:
if (type == AtomSize.S32 || type == AtomSize.U32)
{
res = context.AtomicOr(mr, addrLow, addrHigh, value);
}
@ -416,12 +207,12 @@ namespace Ryujinx.Graphics.Shader.Instructions
context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
}
break;
case AtomicOp.Maximum:
if (type == ReductionType.S32)
case AtomOp.Max:
if (type == AtomSize.S32)
{
res = context.AtomicMaxS32(mr, addrLow, addrHigh, value);
}
else if (type == ReductionType.U32)
else if (type == AtomSize.U32)
{
res = context.AtomicMaxU32(mr, addrLow, addrHigh, value);
}
@ -430,12 +221,12 @@ namespace Ryujinx.Graphics.Shader.Instructions
context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
}
break;
case AtomicOp.Minimum:
if (type == ReductionType.S32)
case AtomOp.Min:
if (type == AtomSize.S32)
{
res = context.AtomicMinS32(mr, addrLow, addrHigh, value);
}
else if (type == ReductionType.U32)
else if (type == AtomSize.U32)
{
res = context.AtomicMinU32(mr, addrLow, addrHigh, value);
}
@ -449,77 +240,82 @@ namespace Ryujinx.Graphics.Shader.Instructions
return res;
}
private static void EmitLoad(EmitterContext context, MemoryRegion region)
private static void EmitLoad(
EmitterContext context,
MemoryRegion region,
LsSize2 size,
Operand srcA,
int rd,
int offset)
{
OpCodeMemory op = (OpCodeMemory)context.CurrOp;
if (op.Size > IntegerSize.B128)
if (size > LsSize2.B128)
{
context.Config.GpuAccessor.Log($"Invalid load size: {op.Size}.");
context.Config.GpuAccessor.Log($"Invalid load size: {size}.");
return;
}
bool isSmallInt = op.Size < IntegerSize.B32;
bool isSmallInt = size < LsSize2.B32;
int count = 1;
switch (op.Size)
switch (size)
{
case IntegerSize.B64: count = 2; break;
case IntegerSize.B128: count = 4; break;
case LsSize2.B64: count = 2; break;
case LsSize2.B128: count = 4; break;
}
Operand baseOffset = context.IAdd(GetSrcA(context), Const(op.Offset));
// Word offset = byte offset / 4 (one word = 4 bytes).
Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
Operand baseOffset = context.IAdd(srcA, Const(offset));
Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2)); // Word offset = byte offset / 4 (one word = 4 bytes).
Operand bitOffset = GetBitOffset(context, baseOffset);
for (int index = 0; index < count; index++)
{
Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
Register dest = new Register(rd + index, RegisterType.Gpr);
if (rd.IsRZ)
if (dest.IsRZ)
{
break;
}
Operand offset = context.IAdd(wordOffset, Const(index));
Operand elemOffset = context.IAdd(wordOffset, Const(index));
Operand value = null;
switch (region)
{
case MemoryRegion.Local: value = context.LoadLocal (offset); break;
case MemoryRegion.Shared: value = context.LoadShared(offset); break;
case MemoryRegion.Local: value = context.LoadLocal(elemOffset); break;
case MemoryRegion.Shared: value = context.LoadShared(elemOffset); break;
}
if (isSmallInt)
{
value = ExtractSmallInt(context, op.Size, bitOffset, value);
value = ExtractSmallInt(context, (LsSize)size, bitOffset, value);
}
context.Copy(Register(rd), value);
context.Copy(Register(dest), value);
}
}
private static void EmitLoadGlobal(EmitterContext context)
private static void EmitLdg(
EmitterContext context,
LsSize size,
int ra,
int rd,
int offset,
bool extended)
{
OpCodeMemory op = (OpCodeMemory)context.CurrOp;
bool isSmallInt = size < LsSize.B32;
bool isSmallInt = op.Size < IntegerSize.B32;
int count = GetVectorCount(size);
int count = GetVectorCount(op.Size);
(Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
(Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
Operand bitOffset = GetBitOffset(context, addrLow);
for (int index = 0; index < count; index++)
{
Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
Register dest = new Register(rd + index, RegisterType.Gpr);
if (rd.IsRZ)
if (dest.IsRZ)
{
break;
}
@ -528,47 +324,47 @@ namespace Ryujinx.Graphics.Shader.Instructions
if (isSmallInt)
{
value = ExtractSmallInt(context, op.Size, bitOffset, value);
value = ExtractSmallInt(context, size, bitOffset, value);
}
context.Copy(Register(rd), value);
context.Copy(Register(dest), value);
}
}
private static void EmitStore(EmitterContext context, MemoryRegion region)
private static void EmitStore(
EmitterContext context,
MemoryRegion region,
LsSize2 size,
Operand srcA,
int rd,
int offset)
{
OpCodeMemory op = (OpCodeMemory)context.CurrOp;
if (op.Size > IntegerSize.B128)
if (size > LsSize2.B128)
{
context.Config.GpuAccessor.Log($"Invalid store size: {op.Size}.");
context.Config.GpuAccessor.Log($"Invalid store size: {size}.");
return;
}
bool isSmallInt = op.Size < IntegerSize.B32;
bool isSmallInt = size < LsSize2.B32;
int count = 1;
switch (op.Size)
switch (size)
{
case IntegerSize.B64: count = 2; break;
case IntegerSize.B128: count = 4; break;
case LsSize2.B64: count = 2; break;
case LsSize2.B128: count = 4; break;
}
Operand baseOffset = context.IAdd(GetSrcA(context), Const(op.Offset));
Operand baseOffset = context.IAdd(srcA, Const(offset));
Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
Operand bitOffset = GetBitOffset(context, baseOffset);
for (int index = 0; index < count; index++)
{
bool isRz = op.Rd.IsRZ;
bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex;
Register rd = new Register(isRz ? op.Rd.Index : op.Rd.Index + index, RegisterType.Gpr);
Operand value = Register(rd);
Operand offset = context.IAdd(wordOffset, Const(index));
Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
Operand elemOffset = context.IAdd(wordOffset, Const(index));
if (isSmallInt)
{
@ -576,60 +372,68 @@ namespace Ryujinx.Graphics.Shader.Instructions
switch (region)
{
case MemoryRegion.Local: word = context.LoadLocal (offset); break;
case MemoryRegion.Shared: word = context.LoadShared(offset); break;
case MemoryRegion.Local: word = context.LoadLocal(elemOffset); break;
case MemoryRegion.Shared: word = context.LoadShared(elemOffset); break;
}
value = InsertSmallInt(context, op.Size, bitOffset, word, value);
value = InsertSmallInt(context, (LsSize)size, bitOffset, word, value);
}
switch (region)
{
case MemoryRegion.Local: context.StoreLocal (offset, value); break;
case MemoryRegion.Shared: context.StoreShared(offset, value); break;
case MemoryRegion.Local: context.StoreLocal(elemOffset, value); break;
case MemoryRegion.Shared: context.StoreShared(elemOffset, value); break;
}
}
}
private static void EmitStoreGlobal(EmitterContext context)
private static void EmitStg(
EmitterContext context,
LsSize2 size,
int ra,
int rd,
int offset,
bool extended)
{
OpCodeMemory op = (OpCodeMemory)context.CurrOp;
if (size > LsSize2.B128)
{
context.Config.GpuAccessor.Log($"Invalid store size: {size}.");
return;
}
bool isSmallInt = op.Size < IntegerSize.B32;
bool isSmallInt = size < LsSize2.B32;
int count = GetVectorCount(op.Size);
int count = GetVectorCount((LsSize)size);
(Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
(Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
Operand bitOffset = GetBitOffset(context, addrLow);
for (int index = 0; index < count; index++)
{
bool isRz = op.Rd.IsRZ;
bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex;
Register rd = new Register(isRz ? op.Rd.Index : op.Rd.Index + index, RegisterType.Gpr);
Operand value = Register(rd);
Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
if (isSmallInt)
{
Operand word = context.LoadGlobal(addrLow, addrHigh);
value = InsertSmallInt(context, op.Size, bitOffset, word, value);
value = InsertSmallInt(context, (LsSize)size, bitOffset, word, value);
}
context.StoreGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh, value);
}
}
private static int GetVectorCount(IntegerSize size)
private static int GetVectorCount(LsSize size)
{
switch (size)
{
case IntegerSize.B64:
case LsSize.B64:
return 2;
case IntegerSize.B128:
case IntegerSize.UB128:
case LsSize.B128:
case LsSize.UB128:
return 4;
}
@ -642,7 +446,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
bool extended,
int offset)
{
Operand addrLow = GetSrcA(context);
Operand addrLow = Register(ra);
Operand addrHigh;
if (extended && !ra.IsRZ)
@ -678,18 +482,18 @@ namespace Ryujinx.Graphics.Shader.Instructions
private static Operand ExtractSmallInt(
EmitterContext context,
IntegerSize size,
Operand bitOffset,
Operand value)
LsSize size,
Operand bitOffset,
Operand value)
{
value = context.ShiftRightU32(value, bitOffset);
switch (size)
{
case IntegerSize.U8: value = ZeroExtendTo32(context, value, 8); break;
case IntegerSize.U16: value = ZeroExtendTo32(context, value, 16); break;
case IntegerSize.S8: value = SignExtendTo32(context, value, 8); break;
case IntegerSize.S16: value = SignExtendTo32(context, value, 16); break;
case LsSize.U8: value = ZeroExtendTo32(context, value, 8); break;
case LsSize.U16: value = ZeroExtendTo32(context, value, 16); break;
case LsSize.S8: value = SignExtendTo32(context, value, 8); break;
case LsSize.S16: value = SignExtendTo32(context, value, 16); break;
}
return value;
@ -697,21 +501,21 @@ namespace Ryujinx.Graphics.Shader.Instructions
private static Operand InsertSmallInt(
EmitterContext context,
IntegerSize size,
Operand bitOffset,
Operand word,
Operand value)
LsSize size,
Operand bitOffset,
Operand word,
Operand value)
{
switch (size)
{
case IntegerSize.U8:
case IntegerSize.S8:
case LsSize.U8:
case LsSize.S8:
value = context.BitwiseAnd(value, Const(0xff));
value = context.BitfieldInsert(word, value, bitOffset, Const(8));
break;
case IntegerSize.U16:
case IntegerSize.S16:
case LsSize.U16:
case LsSize.S16:
value = context.BitwiseAnd(value, Const(0xffff));
value = context.BitfieldInsert(word, value, bitOffset, Const(16));
break;

View file

@ -9,67 +9,85 @@ namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Mov(EmitterContext context)
public static void MovR(EmitterContext context)
{
context.Copy(GetDest(context), GetSrcB(context));
InstMovR op = context.GetOp<InstMovR>();
context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcA));
}
public static void R2p(EmitterContext context)
public static void MovI(EmitterContext context)
{
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
InstMovI op = context.GetOp<InstMovI>();
bool isCC = op.RawOpCode.Extract(40);
int shift = op.RawOpCode.Extract(41, 2) * 8;
context.Copy(GetDest(op.Dest), GetSrcImm(context, op.Imm20));
}
Operand value = GetSrcA(context);
Operand mask = GetSrcB(context);
public static void MovC(EmitterContext context)
{
InstMovC op = context.GetOp<InstMovC>();
Operand Test(Operand value, int bit)
{
return context.ICompareNotEqual(context.BitwiseAnd(value, Const(1 << bit)), Const(0));
}
context.Copy(GetDest(op.Dest), GetSrcCbuf(context, op.CbufSlot, op.CbufOffset));
}
if (isCC)
{
// TODO: Support Register to condition code flags copy.
context.Config.GpuAccessor.Log("R2P.CC not implemented.");
}
else
{
for (int bit = 0; bit < 7; bit++)
{
Operand pred = Register(bit, RegisterType.Predicate);
public static void Mov32i(EmitterContext context)
{
InstMov32i op = context.GetOp<InstMov32i>();
Operand res = context.ConditionalSelect(Test(mask, bit), Test(value, bit + shift), pred);
context.Copy(GetDest(op.Dest), GetSrcImm(context, op.Imm32));
}
context.Copy(pred, res);
}
}
public static void R2pR(EmitterContext context)
{
InstR2pR op = context.GetOp<InstR2pR>();
Operand value = GetSrcReg(context, op.SrcA);
Operand mask = GetSrcReg(context, op.SrcB);
EmitR2p(context, value, mask, op.ByteSel, op.Ccpr);
}
public static void R2pI(EmitterContext context)
{
InstR2pI op = context.GetOp<InstR2pI>();
Operand value = GetSrcReg(context, op.SrcA);
Operand mask = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitR2p(context, value, mask, op.ByteSel, op.Ccpr);
}
public static void R2pC(EmitterContext context)
{
InstR2pC op = context.GetOp<InstR2pC>();
Operand value = GetSrcReg(context, op.SrcA);
Operand mask = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitR2p(context, value, mask, op.ByteSel, op.Ccpr);
}
public static void S2r(EmitterContext context)
{
// TODO: Better impl.
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
SystemRegister sysReg = (SystemRegister)op.RawOpCode.Extract(20, 8);
InstS2r op = context.GetOp<InstS2r>();
Operand src;
switch (sysReg)
switch (op.SReg)
{
case SystemRegister.LaneId: src = Attribute(AttributeConsts.LaneId); break;
// TODO: Use value from Y direction GPU register.
case SystemRegister.YDirection: src = ConstF(1); break;
case SystemRegister.ThreadKill: src = context.Config.Stage == ShaderStage.Fragment
? Attribute(AttributeConsts.ThreadKill)
: Const(0);
case SReg.LaneId:
src = Attribute(AttributeConsts.LaneId);
break;
case SystemRegister.ThreadId:
{
case SReg.YDirection:
src = ConstF(1); // TODO: Use value from Y direction GPU register.
break;
case SReg.ThreadKill:
src = context.Config.Stage == ShaderStage.Fragment ? Attribute(AttributeConsts.ThreadKill) : Const(0);
break;
case SReg.TId:
Operand tidX = Attribute(AttributeConsts.ThreadIdX);
Operand tidY = Attribute(AttributeConsts.ThreadIdY);
Operand tidZ = Attribute(AttributeConsts.ThreadIdZ);
@ -78,62 +96,115 @@ namespace Ryujinx.Graphics.Shader.Instructions
tidZ = context.ShiftLeft(tidZ, Const(26));
src = context.BitwiseOr(tidX, context.BitwiseOr(tidY, tidZ));
break;
}
case SystemRegister.ThreadIdX: src = Attribute(AttributeConsts.ThreadIdX); break;
case SystemRegister.ThreadIdY: src = Attribute(AttributeConsts.ThreadIdY); break;
case SystemRegister.ThreadIdZ: src = Attribute(AttributeConsts.ThreadIdZ); break;
case SystemRegister.CtaIdX: src = Attribute(AttributeConsts.CtaIdX); break;
case SystemRegister.CtaIdY: src = Attribute(AttributeConsts.CtaIdY); break;
case SystemRegister.CtaIdZ: src = Attribute(AttributeConsts.CtaIdZ); break;
case SystemRegister.EqMask: src = Attribute(AttributeConsts.EqMask); break;
case SystemRegister.LtMask: src = Attribute(AttributeConsts.LtMask); break;
case SystemRegister.LeMask: src = Attribute(AttributeConsts.LeMask); break;
case SystemRegister.GtMask: src = Attribute(AttributeConsts.GtMask); break;
case SystemRegister.GeMask: src = Attribute(AttributeConsts.GeMask); break;
case SReg.TIdX:
src = Attribute(AttributeConsts.ThreadIdX);
break;
case SReg.TIdY:
src = Attribute(AttributeConsts.ThreadIdY);
break;
case SReg.TIdZ:
src = Attribute(AttributeConsts.ThreadIdZ);
break;
default: src = Const(0); break;
case SReg.CtaIdX:
src = Attribute(AttributeConsts.CtaIdX);
break;
case SReg.CtaIdY:
src = Attribute(AttributeConsts.CtaIdY);
break;
case SReg.CtaIdZ:
src = Attribute(AttributeConsts.CtaIdZ);
break;
case SReg.EqMask:
src = Attribute(AttributeConsts.EqMask);
break;
case SReg.LtMask:
src = Attribute(AttributeConsts.LtMask);
break;
case SReg.LeMask:
src = Attribute(AttributeConsts.LeMask);
break;
case SReg.GtMask:
src = Attribute(AttributeConsts.GtMask);
break;
case SReg.GeMask:
src = Attribute(AttributeConsts.GeMask);
break;
default:
src = Const(0);
break;
}
context.Copy(GetDest(context), src);
context.Copy(GetDest(op.Dest), src);
}
public static void Sel(EmitterContext context)
public static void SelR(EmitterContext context)
{
Operand pred = GetPredicate39(context);
InstSelR op = context.GetOp<InstSelR>();
Operand srcA = GetSrcA(context);
Operand srcB = GetSrcB(context);
Operand srcA = GetSrcReg(context, op.SrcA);
Operand srcB = GetSrcReg(context, op.SrcB);
Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
Operand res = context.ConditionalSelect(pred, srcA, srcB);
context.Copy(GetDest(context), res);
EmitSel(context, srcA, srcB, srcPred, op.Dest);
}
public static void Shfl(EmitterContext context)
public static void SelI(EmitterContext context)
{
OpCodeShuffle op = (OpCodeShuffle)context.CurrOp;
InstSelI op = context.GetOp<InstSelI>();
Operand pred = Register(op.Predicate48);
Operand srcA = GetSrcReg(context, op.SrcA);
Operand srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
Operand srcA = GetSrcA(context);
EmitSel(context, srcA, srcB, srcPred, op.Dest);
}
Operand srcB = op.IsBImmediate ? Const(op.ImmediateB) : Register(op.Rb);
Operand srcC = op.IsCImmediate ? Const(op.ImmediateC) : Register(op.Rc);
public static void SelC(EmitterContext context)
{
InstSelC op = context.GetOp<InstSelC>();
(Operand res, Operand valid) = op.ShuffleType switch
Operand srcA = GetSrcReg(context, op.SrcA);
Operand srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitSel(context, srcA, srcB, srcPred, op.Dest);
}
private static void EmitR2p(EmitterContext context, Operand value, Operand mask, ByteSel byteSel, bool ccpr)
{
Operand Test(Operand value, int bit)
{
ShuffleType.Indexed => context.Shuffle(srcA, srcB, srcC),
ShuffleType.Up => context.ShuffleUp(srcA, srcB, srcC),
ShuffleType.Down => context.ShuffleDown(srcA, srcB, srcC),
ShuffleType.Butterfly => context.ShuffleXor(srcA, srcB, srcC),
_ => (null, null)
};
return context.ICompareNotEqual(context.BitwiseAnd(value, Const(1 << bit)), Const(0));
}
context.Copy(GetDest(context), res);
context.Copy(pred, valid);
if (ccpr)
{
// TODO: Support Register to condition code flags copy.
context.Config.GpuAccessor.Log("R2P.CC not implemented.");
}
else
{
int shift = (int)byteSel * 8;
for (int bit = 0; bit < RegisterConsts.PredsCount; bit++)
{
Operand pred = Register(bit, RegisterType.Predicate);
Operand res = context.ConditionalSelect(Test(mask, bit), Test(value, bit + shift), pred);
context.Copy(pred, res);
}
}
}
private static void EmitSel(EmitterContext context, Operand srcA, Operand srcB, Operand srcPred, int rd)
{
Operand res = context.ConditionalSelect(srcPred, srcA, srcB);
context.Copy(GetDest(rd), res);
}
}
}

View file

@ -0,0 +1,85 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void RroR(EmitterContext context)
{
InstRroR op = context.GetOp<InstRroR>();
EmitRro(context, GetSrcReg(context, op.SrcB), op.Dest, op.AbsB, op.NegB);
}
public static void RroI(EmitterContext context)
{
InstRroI op = context.GetOp<InstRroI>();
EmitRro(context, GetSrcImm(context, Imm20ToFloat(op.Imm20)), op.Dest, op.AbsB, op.NegB);
}
public static void RroC(EmitterContext context)
{
InstRroC op = context.GetOp<InstRroC>();
EmitRro(context, GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.AbsB, op.NegB);
}
public static void Mufu(EmitterContext context)
{
InstMufu op = context.GetOp<InstMufu>();
Operand res = context.FPAbsNeg(GetSrcReg(context, op.SrcA), op.AbsA, op.NegA);
switch (op.MufuOp)
{
case MufuOp.Cos:
res = context.FPCosine(res);
break;
case MufuOp.Sin:
res = context.FPSine(res);
break;
case MufuOp.Ex2:
res = context.FPExponentB2(res);
break;
case MufuOp.Lg2:
res = context.FPLogarithmB2(res);
break;
case MufuOp.Rcp:
res = context.FPReciprocal(res);
break;
case MufuOp.Rsq:
res = context.FPReciprocalSquareRoot(res);
break;
case MufuOp.Sqrt:
res = context.FPSquareRoot(res);
break;
default: /* TODO */ break;
}
context.Copy(GetDest(op.Dest), context.FPSaturate(res, op.Sat));
}
private static void EmitRro(EmitterContext context, Operand srcB, int rd, bool absB, bool negB)
{
// This is the range reduction operator,
// we translate it as a simple move, as it
// should be always followed by a matching
// MUFU instruction.
srcB = context.FPAbsNeg(srcB, absB, negB);
context.Copy(GetDest(rd), srcB);
}
}
}

View file

@ -0,0 +1,15 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.Translation;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Nop(EmitterContext context)
{
InstNop op = context.GetOp<InstNop>();
// No operation.
}
}
}

View file

@ -0,0 +1,54 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Pset(EmitterContext context)
{
InstPset op = context.GetOp<InstPset>();
Operand srcA = context.BitwiseNot(Register(op.Src2Pred, RegisterType.Predicate), op.Src2PredInv);
Operand srcB = context.BitwiseNot(Register(op.Src1Pred, RegisterType.Predicate), op.Src1PredInv);
Operand srcC = context.BitwiseNot(Register(op.SrcPred, RegisterType.Predicate), op.SrcPredInv);
Operand res = GetPredLogicalOp(context, op.BoolOpAB, srcA, srcB);
res = GetPredLogicalOp(context, op.BoolOpC, res, srcC);
Operand dest = GetDest(op.Dest);
if (op.BVal)
{
context.Copy(dest, context.ConditionalSelect(res, ConstF(1), Const(0)));
}
else
{
context.Copy(dest, res);
}
}
public static void Psetp(EmitterContext context)
{
InstPsetp op = context.GetOp<InstPsetp>();
Operand srcA = context.BitwiseNot(Register(op.Src2Pred, RegisterType.Predicate), op.Src2PredInv);
Operand srcB = context.BitwiseNot(Register(op.Src1Pred, RegisterType.Predicate), op.Src1PredInv);
Operand p0Res = GetPredLogicalOp(context, op.BoolOpAB, srcA, srcB);
Operand p1Res = context.BitwiseNot(p0Res);
Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
p0Res = GetPredLogicalOp(context, op.BoolOpC, p0Res, srcPred);
p1Res = GetPredLogicalOp(context, op.BoolOpC, p1Res, srcPred);
context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res);
context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res);
}
}
}

View file

@ -0,0 +1,132 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void ShlR(EmitterContext context)
{
InstShlR op = context.GetOp<InstShlR>();
EmitShl(context, GetSrcReg(context, op.SrcA), GetSrcReg(context, op.SrcB), op.Dest, op.M);
}
public static void ShlI(EmitterContext context)
{
InstShlI op = context.GetOp<InstShlI>();
EmitShl(context, GetSrcReg(context, op.SrcA), GetSrcImm(context, Imm20ToSInt(op.Imm20)), op.Dest, op.M);
}
public static void ShlC(EmitterContext context)
{
InstShlC op = context.GetOp<InstShlC>();
EmitShl(context, GetSrcReg(context, op.SrcA), GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.M);
}
public static void ShrR(EmitterContext context)
{
InstShrR op = context.GetOp<InstShrR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitShr(context, srcA, srcB, op.Dest, op.M, op.Brev, op.Signed);
}
public static void ShrI(EmitterContext context)
{
InstShrI op = context.GetOp<InstShrI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitShr(context, srcA, srcB, op.Dest, op.M, op.Brev, op.Signed);
}
public static void ShrC(EmitterContext context)
{
InstShrC op = context.GetOp<InstShrC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitShr(context, srcA, srcB, op.Dest, op.M, op.Brev, op.Signed);
}
private static void EmitShl(EmitterContext context, Operand srcA, Operand srcB, int rd, bool mask)
{
if (mask)
{
srcB = context.BitwiseAnd(srcB, Const(0x1f));
}
Operand res = context.ShiftLeft(srcA, srcB);
if (!mask)
{
// Clamped shift value.
Operand isLessThan32 = context.ICompareLessUnsigned(srcB, Const(32));
res = context.ConditionalSelect(isLessThan32, res, Const(0));
}
// TODO: X, CC.
context.Copy(GetDest(rd), res);
}
private static void EmitShr(
EmitterContext context,
Operand srcA,
Operand srcB,
int rd,
bool mask,
bool bitReverse,
bool isSigned)
{
if (bitReverse)
{
srcA = context.BitfieldReverse(srcA);
}
if (mask)
{
srcB = context.BitwiseAnd(srcB, Const(0x1f));
}
Operand res = isSigned
? context.ShiftRightS32(srcA, srcB)
: context.ShiftRightU32(srcA, srcB);
if (!mask)
{
// Clamped shift value.
Operand resShiftBy32;
if (isSigned)
{
resShiftBy32 = context.ShiftRightS32(srcA, Const(31));
}
else
{
resShiftBy32 = Const(0);
}
Operand isLessThan32 = context.ICompareLessUnsigned(srcB, Const(32));
res = context.ConditionalSelect(isLessThan32, res, resShiftBy32);
}
// TODO: X, CC.
context.Copy(GetDest(rd), res);
}
}
}

View file

@ -0,0 +1,776 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using System.Collections.Generic;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void SuatomB(EmitterContext context)
{
InstSuatomB op = context.GetOp<InstSuatomB>();
EmitSuatom(
context,
op.Dim,
op.Op,
op.Size,
0,
op.SrcA,
op.SrcB,
op.SrcC,
op.Dest,
op.Ba,
isBindless: true,
compareAndSwap: false);
}
public static void Suatom(EmitterContext context)
{
InstSuatom op = context.GetOp<InstSuatom>();
EmitSuatom(
context,
op.Dim,
op.Op,
op.Size,
op.TidB,
op.SrcA,
op.SrcB,
0,
op.Dest,
op.Ba,
isBindless: false,
compareAndSwap: false);
}
public static void SuatomB2(EmitterContext context)
{
InstSuatomB2 op = context.GetOp<InstSuatomB2>();
EmitSuatom(
context,
op.Dim,
op.Op,
op.Size,
0,
op.SrcA,
op.SrcB,
op.SrcC,
op.Dest,
op.Ba,
isBindless: true,
compareAndSwap: false);
}
public static void SuatomCasB(EmitterContext context)
{
InstSuatomCasB op = context.GetOp<InstSuatomCasB>();
EmitSuatom(
context,
op.Dim,
0,
op.Size,
0,
op.SrcA,
op.SrcB,
op.SrcC,
op.Dest,
op.Ba,
isBindless: true,
compareAndSwap: true);
}
public static void SuatomCas(EmitterContext context)
{
InstSuatomCas op = context.GetOp<InstSuatomCas>();
EmitSuatom(
context,
op.Dim,
0,
op.Size,
op.TidB,
op.SrcA,
op.SrcB,
0,
op.Dest,
op.Ba,
isBindless: false,
compareAndSwap: true);
}
public static void SuldDB(EmitterContext context)
{
InstSuldDB op = context.GetOp<InstSuldDB>();
EmitSuld(context, op.Dim, op.Size, 0, 0, op.SrcA, op.Dest, op.SrcC, useComponents: false, op.Ba, isBindless: true);
}
public static void SuldD(EmitterContext context)
{
InstSuldD op = context.GetOp<InstSuldD>();
EmitSuld(context, op.Dim, op.Size, op.TidB, 0, op.SrcA, op.Dest, 0, useComponents: false, op.Ba, isBindless: false);
}
public static void SuldB(EmitterContext context)
{
InstSuldB op = context.GetOp<InstSuldB>();
EmitSuld(context, op.Dim, 0, 0, op.Rgba, op.SrcA, op.Dest, 0, useComponents: true, false, isBindless: true);
}
public static void Suld(EmitterContext context)
{
InstSuld op = context.GetOp<InstSuld>();
EmitSuld(context, op.Dim, 0, op.TidB, op.Rgba, op.SrcA, op.Dest, 0, useComponents: true, false, isBindless: false);
}
public static void SuredB(EmitterContext context)
{
InstSuredB op = context.GetOp<InstSuredB>();
EmitSured(context, op.Dim, op.Op, op.Size, 0, op.SrcA, op.Dest, op.SrcC, op.Ba, isBindless: true);
}
public static void Sured(EmitterContext context)
{
InstSured op = context.GetOp<InstSured>();
EmitSured(context, op.Dim, op.Op, op.Size, op.TidB, op.SrcA, op.Dest, 0, op.Ba, isBindless: false);
}
public static void SustDB(EmitterContext context)
{
InstSustDB op = context.GetOp<InstSustDB>();
EmitSust(context, op.Dim, op.Size, 0, 0, op.SrcA, op.Dest, op.SrcC, useComponents: false, op.Ba, isBindless: true);
}
public static void SustD(EmitterContext context)
{
InstSustD op = context.GetOp<InstSustD>();
EmitSust(context, op.Dim, op.Size, op.TidB, 0, op.SrcA, op.Dest, 0, useComponents: false, op.Ba, isBindless: false);
}
public static void SustB(EmitterContext context)
{
InstSustB op = context.GetOp<InstSustB>();
EmitSust(context, op.Dim, 0, 0, op.Rgba, op.SrcA, op.Dest, op.SrcC, useComponents: true, false, isBindless: true);
}
public static void Sust(EmitterContext context)
{
InstSust op = context.GetOp<InstSust>();
EmitSust(context, op.Dim, 0, op.TidB, op.Rgba, op.SrcA, op.Dest, 0, useComponents: true, false, isBindless: false);
}
private static void EmitSuatom(
EmitterContext context,
SuDim dimensions,
SuatomOp atomicOp,
SuatomSize size,
int imm,
int srcA,
int srcB,
int srcC,
int dest,
bool byteAddress,
bool isBindless,
bool compareAndSwap)
{
SamplerType type = ConvertSamplerType(dimensions);
if (type == SamplerType.None)
{
context.Config.GpuAccessor.Log("Invalid image atomic sampler type.");
return;
}
Operand Ra()
{
if (srcA > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(srcA++, RegisterType.Gpr));
}
Operand Rb()
{
if (srcB > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(srcB++, RegisterType.Gpr));
}
Operand GetDest()
{
if (dest > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return Register(dest++, RegisterType.Gpr);
}
List<Operand> sourcesList = new List<Operand>();
if (isBindless)
{
sourcesList.Add(context.Copy(GetSrcReg(context, srcC)));
}
int coordsCount = type.GetDimensions();
for (int index = 0; index < coordsCount; index++)
{
sourcesList.Add(Ra());
}
if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
{
sourcesList.Add(Const(0));
type &= ~SamplerType.Mask;
type |= SamplerType.Texture2D;
}
if (type.HasFlag(SamplerType.Array))
{
sourcesList.Add(Ra());
type |= SamplerType.Array;
}
if (byteAddress)
{
int xIndex = isBindless ? 1 : 0;
sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(size)));
}
// TODO: FP and 64-bit formats.
TextureFormat format = !isBindless && (size == SuatomSize.Sd32 || size == SuatomSize.Sd64)
? context.Config.GetTextureFormatAtomic(imm)
: GetTextureFormat(size);
if (compareAndSwap)
{
sourcesList.Add(Rb());
}
sourcesList.Add(Rb());
Operand[] sources = sourcesList.ToArray();
TextureFlags flags = compareAndSwap ? TextureFlags.CAS : GetAtomicOpFlags(atomicOp);
if (isBindless)
{
flags |= TextureFlags.Bindless;
}
TextureOperation operation = context.CreateTextureOperation(
Instruction.ImageAtomic,
type,
format,
flags,
imm,
0,
GetDest(),
sources);
context.Add(operation);
}
private static void EmitSuld(
EmitterContext context,
SuDim dimensions,
SuSize size,
int imm,
SuRgba componentMask,
int srcA,
int srcB,
int srcC,
bool useComponents,
bool byteAddress,
bool isBindless)
{
context.Config.SetUsedFeature(FeatureFlags.IntegerSampling);
SamplerType type = ConvertSamplerType(dimensions);
if (type == SamplerType.None)
{
context.Config.GpuAccessor.Log("Invalid image store sampler type.");
return;
}
Operand Ra()
{
if (srcA > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(srcA++, RegisterType.Gpr));
}
List<Operand> sourcesList = new List<Operand>();
if (isBindless)
{
sourcesList.Add(context.Copy(Register(srcC, RegisterType.Gpr)));
}
int coordsCount = type.GetDimensions();
for (int index = 0; index < coordsCount; index++)
{
sourcesList.Add(Ra());
}
if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
{
sourcesList.Add(Const(0));
type &= ~SamplerType.Mask;
type |= SamplerType.Texture2D;
}
if (type.HasFlag(SamplerType.Array))
{
sourcesList.Add(Ra());
}
Operand[] sources = sourcesList.ToArray();
int handle = imm;
TextureFlags flags = isBindless ? TextureFlags.Bindless : TextureFlags.None;
if (useComponents)
{
for (int compMask = (int)componentMask, compIndex = 0; compMask != 0; compMask >>= 1, compIndex++)
{
if ((compMask & 1) == 0)
{
continue;
}
if (srcB == RegisterConsts.RegisterZeroIndex)
{
break;
}
Operand rd = Register(srcB++, RegisterType.Gpr);
TextureOperation operation = context.CreateTextureOperation(
Instruction.ImageLoad,
type,
flags,
handle,
compIndex,
rd,
sources);
if (!isBindless)
{
operation.Format = context.Config.GetTextureFormat(handle);
}
context.Add(operation);
}
}
else
{
if (byteAddress)
{
int xIndex = isBindless ? 1 : 0;
sources[xIndex] = context.ShiftRightS32(sources[xIndex], Const(GetComponentSizeInBytesLog2(size)));
}
int components = GetComponents(size);
for (int compIndex = 0; compIndex < components; compIndex++)
{
if (srcB == RegisterConsts.RegisterZeroIndex)
{
break;
}
Operand rd = Register(srcB++, RegisterType.Gpr);
TextureOperation operation = context.CreateTextureOperation(
Instruction.ImageLoad,
type,
GetTextureFormat(size),
flags,
handle,
compIndex,
rd,
sources);
context.Add(operation);
switch (size)
{
case SuSize.U8: context.Copy(rd, ZeroExtendTo32(context, rd, 8)); break;
case SuSize.U16: context.Copy(rd, ZeroExtendTo32(context, rd, 16)); break;
case SuSize.S8: context.Copy(rd, SignExtendTo32(context, rd, 8)); break;
case SuSize.S16: context.Copy(rd, SignExtendTo32(context, rd, 16)); break;
}
}
}
}
private static void EmitSured(
EmitterContext context,
SuDim dimensions,
RedOp atomicOp,
SuatomSize size,
int imm,
int srcA,
int srcB,
int srcC,
bool byteAddress,
bool isBindless)
{
SamplerType type = ConvertSamplerType(dimensions);
if (type == SamplerType.None)
{
context.Config.GpuAccessor.Log("Invalid image reduction sampler type.");
return;
}
Operand Ra()
{
if (srcA > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(srcA++, RegisterType.Gpr));
}
Operand Rb()
{
if (srcB > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(srcB++, RegisterType.Gpr));
}
List<Operand> sourcesList = new List<Operand>();
if (isBindless)
{
sourcesList.Add(context.Copy(GetSrcReg(context, srcC)));
}
int coordsCount = type.GetDimensions();
for (int index = 0; index < coordsCount; index++)
{
sourcesList.Add(Ra());
}
if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
{
sourcesList.Add(Const(0));
type &= ~SamplerType.Mask;
type |= SamplerType.Texture2D;
}
if (type.HasFlag(SamplerType.Array))
{
sourcesList.Add(Ra());
type |= SamplerType.Array;
}
if (byteAddress)
{
int xIndex = isBindless ? 1 : 0;
sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(size)));
}
// TODO: FP and 64-bit formats.
TextureFormat format = !isBindless && (size == SuatomSize.Sd32 || size == SuatomSize.Sd64)
? context.Config.GetTextureFormatAtomic(imm)
: GetTextureFormat(size);
sourcesList.Add(Rb());
Operand[] sources = sourcesList.ToArray();
TextureFlags flags = GetAtomicOpFlags((SuatomOp)atomicOp);
if (isBindless)
{
flags |= TextureFlags.Bindless;
}
TextureOperation operation = context.CreateTextureOperation(
Instruction.ImageAtomic,
type,
format,
flags,
imm,
0,
null,
sources);
context.Add(operation);
}
private static void EmitSust(
EmitterContext context,
SuDim dimensions,
SuSize size,
int imm,
SuRgba componentMask,
int srcA,
int srcB,
int srcC,
bool useComponents,
bool byteAddress,
bool isBindless)
{
SamplerType type = ConvertSamplerType(dimensions);
if (type == SamplerType.None)
{
context.Config.GpuAccessor.Log("Invalid image store sampler type.");
return;
}
Operand Ra()
{
if (srcA > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(srcA++, RegisterType.Gpr));
}
Operand Rb()
{
if (srcB > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(srcB++, RegisterType.Gpr));
}
List<Operand> sourcesList = new List<Operand>();
if (isBindless)
{
sourcesList.Add(context.Copy(Register(srcC, RegisterType.Gpr)));
}
int coordsCount = type.GetDimensions();
for (int index = 0; index < coordsCount; index++)
{
sourcesList.Add(Ra());
}
if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
{
sourcesList.Add(Const(0));
type &= ~SamplerType.Mask;
type |= SamplerType.Texture2D;
}
if (type.HasFlag(SamplerType.Array))
{
sourcesList.Add(Ra());
}
TextureFormat format = TextureFormat.Unknown;
if (useComponents)
{
for (int compMask = (int)componentMask, compIndex = 0; compMask != 0; compMask >>= 1, compIndex++)
{
if ((compMask & 1) != 0)
{
sourcesList.Add(Rb());
}
}
if (!isBindless)
{
format = context.Config.GetTextureFormat(imm);
}
}
else
{
if (byteAddress)
{
int xIndex = isBindless ? 1 : 0;
sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(size)));
}
int components = GetComponents(size);
for (int compIndex = 0; compIndex < components; compIndex++)
{
sourcesList.Add(Rb());
}
format = GetTextureFormat(size);
}
Operand[] sources = sourcesList.ToArray();
int handle = imm;
TextureFlags flags = isBindless ? TextureFlags.Bindless : TextureFlags.None;
TextureOperation operation = context.CreateTextureOperation(
Instruction.ImageStore,
type,
format,
flags,
handle,
0,
null,
sources);
context.Add(operation);
}
private static int GetComponentSizeInBytesLog2(SuatomSize size)
{
return size switch
{
SuatomSize.U32 => 2,
SuatomSize.S32 => 2,
SuatomSize.U64 => 3,
SuatomSize.F32FtzRn => 2,
SuatomSize.F16x2FtzRn => 2,
SuatomSize.S64 => 3,
SuatomSize.Sd32 => 2,
SuatomSize.Sd64 => 3,
_ => 2
};
}
private static TextureFormat GetTextureFormat(SuatomSize size)
{
return size switch
{
SuatomSize.U32 => TextureFormat.R32Uint,
SuatomSize.S32 => TextureFormat.R32Sint,
SuatomSize.U64 => TextureFormat.R32G32Uint,
SuatomSize.F32FtzRn => TextureFormat.R32Float,
SuatomSize.F16x2FtzRn => TextureFormat.R16G16Float,
SuatomSize.S64 => TextureFormat.R32G32Uint,
SuatomSize.Sd32 => TextureFormat.R32Uint,
SuatomSize.Sd64 => TextureFormat.R32G32Uint,
_ => TextureFormat.R32Uint
};
}
private static TextureFlags GetAtomicOpFlags(SuatomOp op)
{
return op switch
{
SuatomOp.Add => TextureFlags.Add,
SuatomOp.Min => TextureFlags.Minimum,
SuatomOp.Max => TextureFlags.Maximum,
SuatomOp.Inc => TextureFlags.Increment,
SuatomOp.Dec => TextureFlags.Decrement,
SuatomOp.And => TextureFlags.BitwiseAnd,
SuatomOp.Or => TextureFlags.BitwiseOr,
SuatomOp.Xor => TextureFlags.BitwiseXor,
SuatomOp.Exch => TextureFlags.Swap,
_ => TextureFlags.Add
};
}
private static int GetComponents(SuSize size)
{
return size switch
{
SuSize.B64 => 2,
SuSize.B128 => 4,
SuSize.UB128 => 4,
_ => 1
};
}
private static int GetComponentSizeInBytesLog2(SuSize size)
{
return size switch
{
SuSize.U8 => 0,
SuSize.S8 => 0,
SuSize.U16 => 1,
SuSize.S16 => 1,
SuSize.B32 => 2,
SuSize.B64 => 3,
SuSize.B128 => 4,
SuSize.UB128 => 4,
_ => 2
};
}
private static TextureFormat GetTextureFormat(SuSize size)
{
return size switch
{
SuSize.U8 => TextureFormat.R8Uint,
SuSize.S8 => TextureFormat.R8Sint,
SuSize.U16 => TextureFormat.R16Uint,
SuSize.S16 => TextureFormat.R16Sint,
SuSize.B32 => TextureFormat.R32Uint,
SuSize.B64 => TextureFormat.R32G32Uint,
SuSize.B128 => TextureFormat.R32G32B32A32Uint,
SuSize.UB128 => TextureFormat.R32G32B32A32Uint,
_ => TextureFormat.R32Uint
};
}
private static SamplerType ConvertSamplerType(SuDim target)
{
return target switch
{
SuDim._1d => SamplerType.Texture1D,
SuDim._1dBuffer => SamplerType.TextureBuffer,
SuDim._1dArray => SamplerType.Texture1D | SamplerType.Array,
SuDim._2d => SamplerType.Texture2D,
SuDim._2dArray => SamplerType.Texture2D | SamplerType.Array,
SuDim._3d => SamplerType.Texture3D,
_ => SamplerType.None
};
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,18 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Vmad(EmitterContext context)
{
InstVmad op = context.GetOp<InstVmad>();
// TODO: Implement properly.
context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcC));
}
}
}

View file

@ -9,47 +9,46 @@ namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Vmad(EmitterContext context)
{
// TODO: Implement properly.
context.Copy(GetDest(context), GetSrcC(context));
}
public static void Vmnmx(EmitterContext context)
{
OpCodeVideo op = (OpCodeVideo)context.CurrOp;
InstVmnmx op = context.GetOp<InstVmnmx>();
bool max = op.RawOpCode.Extract(56);
Operand srcA = Extend(context, GetSrcA(context), op.RaSelection, op.RaType);
Operand srcC = GetSrcC(context);
Operand srcA = Extend(context, GetSrcReg(context, op.SrcA), op.ASelect);
Operand srcC = GetSrcReg(context, op.SrcC);
Operand srcB;
if (op.HasRb)
if (op.BVideo)
{
srcB = Extend(context, Register(op.Rb), op.RbSelection, op.RbType);
srcB = Extend(context, GetSrcReg(context, op.SrcB), op.BSelect);
}
else
{
srcB = Const(op.Immediate);
int imm = op.Imm16;
if ((op.BSelect & VectorSelect.S8B0) != 0)
{
imm = (imm << 16) >> 16;
}
srcB = Const(imm);
}
Operand res;
bool resSigned;
if ((op.RaType & VideoType.Signed) != (op.RbType & VideoType.Signed))
if ((op.ASelect & VectorSelect.S8B0) != (op.BSelect & VectorSelect.S8B0))
{
// Signedness is different, but for max, result will always fit a U32,
// since one of the inputs can't be negative, and the result is the one
// with highest value. For min, it will always fit on a S32, since
// one of the input can't be greater than INT_MAX and we want the lowest value.
resSigned = !max;
resSigned = !op.Mn;
res = max ? context.IMaximumU32(srcA, srcB) : context.IMinimumS32(srcA, srcB);
res = op.Mn ? context.IMaximumU32(srcA, srcB) : context.IMinimumS32(srcA, srcB);
if ((op.RaType & VideoType.Signed) != 0)
if ((op.ASelect & VectorSelect.S8B0) != 0)
{
Operand isBGtIntMax = context.ICompareLess(srcB, Const(0));
@ -65,9 +64,9 @@ namespace Ryujinx.Graphics.Shader.Instructions
else
{
// Ra and Rb have the same signedness, so doesn't matter which one we test.
resSigned = (op.RaType & VideoType.Signed) != 0;
resSigned = (op.ASelect & VectorSelect.S8B0) != 0;
if (max)
if (op.Mn)
{
res = resSigned
? context.IMaximumS32(srcA, srcB)
@ -81,54 +80,62 @@ namespace Ryujinx.Graphics.Shader.Instructions
}
}
if (op.Saturate)
if (op.Sat)
{
if (op.DstSigned && !resSigned)
if (op.DFormat && !resSigned)
{
res = context.IMinimumU32(res, Const(int.MaxValue));
}
else if (!op.DstSigned && resSigned)
else if (!op.DFormat && resSigned)
{
res = context.IMaximumS32(res, Const(0));
}
}
switch (op.PostOp)
switch (op.VideoOp)
{
case VideoPostOp.Acc:
case VideoOp.Acc:
res = context.IAdd(res, srcC);
break;
case VideoPostOp.Max:
res = op.DstSigned ? context.IMaximumS32(res, srcC) : context.IMaximumU32(res, srcC);
case VideoOp.Max:
res = op.DFormat ? context.IMaximumS32(res, srcC) : context.IMaximumU32(res, srcC);
break;
case VideoPostOp.Min:
res = op.DstSigned ? context.IMinimumS32(res, srcC) : context.IMinimumU32(res, srcC);
case VideoOp.Min:
res = op.DFormat ? context.IMinimumS32(res, srcC) : context.IMinimumU32(res, srcC);
break;
case VideoPostOp.Mrg16h:
case VideoOp.Mrg16h:
res = context.BitfieldInsert(srcC, res, Const(16), Const(16));
break;
case VideoPostOp.Mrg16l:
case VideoOp.Mrg16l:
res = context.BitfieldInsert(srcC, res, Const(0), Const(16));
break;
case VideoPostOp.Mrg8b0:
case VideoOp.Mrg8b0:
res = context.BitfieldInsert(srcC, res, Const(0), Const(8));
break;
case VideoPostOp.Mrg8b2:
case VideoOp.Mrg8b2:
res = context.BitfieldInsert(srcC, res, Const(16), Const(8));
break;
}
context.Copy(GetDest(context), res);
context.Copy(GetDest(op.Dest), res);
}
private static Operand Extend(EmitterContext context, Operand src, int sel, VideoType type)
private static Operand Extend(EmitterContext context, Operand src, VectorSelect type)
{
return type switch
{
VideoType.U8 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(sel * 8)), 8),
VideoType.U16 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(sel * 16)), 16),
VideoType.S8 => SignExtendTo32(context, context.ShiftRightU32(src, Const(sel * 8)), 8),
VideoType.S16 => SignExtendTo32(context, context.ShiftRightU32(src, Const(sel * 16)), 16),
VectorSelect.U8B0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8),
VectorSelect.U8B1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8),
VectorSelect.U8B2 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8),
VectorSelect.U8B3 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8),
VectorSelect.U16H0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16),
VectorSelect.U16H1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16),
VectorSelect.S8B0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8),
VectorSelect.S8B1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8),
VectorSelect.S8B2 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8),
VectorSelect.S8B3 => SignExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8),
VectorSelect.S16H0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16),
VectorSelect.S16H1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16),
_ => src
};
}

View file

@ -1,48 +0,0 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Vote(EmitterContext context)
{
OpCodeVote op = (OpCodeVote)context.CurrOp;
Operand pred = GetPredicate39(context);
Operand res = null;
switch (op.VoteOp)
{
case VoteOp.All:
res = context.VoteAll(pred);
break;
case VoteOp.Any:
res = context.VoteAny(pred);
break;
case VoteOp.AllEqual:
res = context.VoteAllEqual(pred);
break;
}
if (res != null)
{
context.Copy(Register(op.Predicate45), res);
}
else
{
context.Config.GpuAccessor.Log($"Invalid vote operation: {op.VoteOp}.");
}
if (!op.Rd.IsRZ)
{
context.Copy(Register(op.Rd), context.Ballot(pred));
}
}
}
}

View file

@ -0,0 +1,84 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Fswzadd(EmitterContext context)
{
InstFswzadd op = context.GetOp<InstFswzadd>();
Operand srcA = GetSrcReg(context, op.SrcA);
Operand srcB = GetSrcReg(context, op.SrcB);
Operand dest = GetDest(op.Dest);
context.Copy(dest, context.FPSwizzleAdd(srcA, srcB, op.PnWord));
InstEmitAluHelper.SetFPZnFlags(context, dest, op.WriteCC);
}
public static void Shfl(EmitterContext context)
{
InstShfl op = context.GetOp<InstShfl>();
Operand pred = Register(op.DestPred, RegisterType.Predicate);
Operand srcA = GetSrcReg(context, op.SrcA);
Operand srcB = op.BFixShfl ? Const(op.SrcBImm) : GetSrcReg(context, op.SrcB);
Operand srcC = op.CFixShfl ? Const(op.SrcCImm) : GetSrcReg(context, op.SrcC);
(Operand res, Operand valid) = op.ShflMode switch
{
ShflMode.Idx => context.Shuffle(srcA, srcB, srcC),
ShflMode.Up => context.ShuffleUp(srcA, srcB, srcC),
ShflMode.Down => context.ShuffleDown(srcA, srcB, srcC),
ShflMode.Bfly => context.ShuffleXor(srcA, srcB, srcC),
_ => (null, null)
};
context.Copy(GetDest(op.Dest), res);
context.Copy(pred, valid);
}
public static void Vote(EmitterContext context)
{
InstVote op = context.GetOp<InstVote>();
Operand pred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
Operand res = null;
switch (op.VoteMode)
{
case VoteMode.All:
res = context.VoteAll(pred);
break;
case VoteMode.Any:
res = context.VoteAny(pred);
break;
case VoteMode.Eq:
res = context.VoteAllEqual(pred);
break;
}
if (res != null)
{
context.Copy(Register(op.VpDest, RegisterType.Predicate), res);
}
else
{
context.Config.GpuAccessor.Log($"Invalid vote operation: {op.VoteMode}.");
}
if (op.Dest != RegisterConsts.RegisterZeroIndex)
{
context.Copy(GetDest(op.Dest), context.Ballot(pred));
}
}
}
}

View file

@ -7,12 +7,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
{
static class Lop3Expression
{
public static Operand GetFromTruthTable(
EmitterContext context,
Operand srcA,
Operand srcB,
Operand srcC,
int imm)
public static Operand GetFromTruthTable(EmitterContext context, Operand srcA, Operand srcB, Operand srcC, int imm)
{
Operand expr = null;
@ -45,7 +40,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
int map;
// Encode into gray code.
map = ((imm >> 0) & 1) << 0;
map = ((imm >> 0) & 1) << 0;
map |= ((imm >> 1) & 1) << 4;
map |= ((imm >> 2) & 1) << 1;
map |= ((imm >> 3) & 1) << 5;