Initial support for double precision shader instructions. (#963)

* Implement DADD, DFMA and DMUL shader instructions * Rename FP to FP32 * Correct double immediate * Classic mistake
2020-03-03 11:02:08 -03:00 · 2020-03-03 11:02:08 -03:00 · dc97457bf0
commit dc97457bf0
parent 3045c1a186
19 changed files with 428 additions and 184 deletions
--- a/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs
+++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs
@ -85,12 +85,19 @@ namespace Ryujinx.Graphics.Shader.Instructions
            context.Copy(GetNF(), context.ICompareLess(dest, Const(0)));
        }

-        public static void SetFPZnFlags(EmitterContext context, Operand dest, bool setCC)
+        public static void SetFPZnFlags(EmitterContext context, Operand dest, bool setCC, Instruction fpType = Instruction.FP32)
        {
            if (setCC)
            {
-                context.Copy(GetZF(), context.FPCompareEqual(dest, ConstF(0)));
-                context.Copy(GetNF(), context.FPCompareLess (dest, ConstF(0)));
+                Operand zero = ConstF(0);
+
+                if (fpType == Instruction.FP64)
+                {
+                    zero = context.FP32ConvertToFP64(zero);
+                }
+
+                context.Copy(GetZF(), context.FPCompareEqual(dest, zero, fpType));
+                context.Copy(GetNF(), context.FPCompareLess (dest, zero, fpType));
            }
        }
    }
--- a/Ryujinx.Graphics.Shader/Instructions/InstEmitConversion.cs
+++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitConversion.cs
@ -21,31 +21,43 @@ namespace Ryujinx.Graphics.Shader.Instructions
            bool negateB   = op.RawOpCode.Extract(45);
            bool absoluteB = op.RawOpCode.Extract(49);

-            Operand srcB = context.FPAbsNeg(GetSrcB(context, srcType), absoluteB, negateB);
+            Operand srcB = context.FPAbsNeg(GetSrcB(context, srcType), absoluteB, negateB, srcType.ToInstFPType());

-            if (round)
+            if (round && srcType == dstType)
            {
                switch (op.RoundingMode)
                {
                    case RoundingMode.ToNearest:
-                        srcB = context.FPRound(srcB);
+                        srcB = context.FPRound(srcB, srcType.ToInstFPType());
                        break;

                    case RoundingMode.TowardsNegativeInfinity:
-                        srcB = context.FPFloor(srcB);
+                        srcB = context.FPFloor(srcB, srcType.ToInstFPType());
                        break;

                    case RoundingMode.TowardsPositiveInfinity:
-                        srcB = context.FPCeiling(srcB);
+                        srcB = context.FPCeiling(srcB, srcType.ToInstFPType());
                        break;

                    case RoundingMode.TowardsZero:
-                        srcB = context.FPTruncate(srcB);
+                        srcB = context.FPTruncate(srcB, srcType.ToInstFPType());
                        break;
                }
            }

-            srcB = context.FPSaturate(srcB, op.Saturate);
+            // We don't need to handle conversions between FP16 <-> FP32
+            // since we do FP16 operations as FP32 directly.
+            // FP16 <-> FP64 conversions are invalid.
+            if (srcType == FPType.FP32 && dstType == FPType.FP64)
+            {
+                srcB = context.FP32ConvertToFP64(srcB);
+            }
+            else if (srcType == FPType.FP64 && dstType == FPType.FP32)
+            {
+                srcB = context.FP64ConvertToFP32(srcB);
+            }
+
+            srcB = context.FPSaturate(srcB, op.Saturate, dstType.ToInstFPType());

            WriteFP(context, dstType, srcB);

@ -229,9 +241,12 @@ namespace Ryujinx.Graphics.Shader.Instructions
            {
                context.Copy(dest, context.PackHalf2x16(srcB, ConstF(0)));
            }
-            else
+            else /* if (type == FPType.FP64) */
            {
-                // TODO.
+                Operand dest2 = GetDest2(context);
+
+                context.Copy(dest, context.UnpackDouble2x32Low(srcB));
+                context.Copy(dest2, context.UnpackDouble2x32High(srcB));
            }
        }
    }
--- a/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs
+++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs
@ -11,53 +11,12 @@ namespace Ryujinx.Graphics.Shader.Instructions
 {
    static partial class InstEmit
    {
-        public static void Fadd(EmitterContext context)
-        {
-            IOpCodeFArith op = (IOpCodeFArith)context.CurrOp;
+        public static void Dadd(EmitterContext context) => EmitFPAdd(context, Instruction.FP64);
+        public static void Dfma(EmitterContext context) => EmitFPFma(context, Instruction.FP64);
+        public static void Dmul(EmitterContext context) => EmitFPMultiply(context, Instruction.FP64);

-            bool absoluteA = op.AbsoluteA, absoluteB, negateA, negateB;
-
-            if (op is OpCodeFArithImm32)
-            {
-                negateB   = op.RawOpCode.Extract(53);
-                negateA   = op.RawOpCode.Extract(56);
-                absoluteB = op.RawOpCode.Extract(57);
-            }
-            else
-            {
-                negateB   = op.RawOpCode.Extract(45);
-                negateA   = op.RawOpCode.Extract(48);
-                absoluteB = op.RawOpCode.Extract(49);
-            }
-
-            Operand srcA = context.FPAbsNeg(GetSrcA(context), absoluteA, negateA);
-            Operand srcB = context.FPAbsNeg(GetSrcB(context), absoluteB, negateB);
-
-            Operand dest = GetDest(context);
-
-            context.Copy(dest, context.FPSaturate(context.FPAdd(srcA, srcB), op.Saturate));
-
-            SetFPZnFlags(context, dest, op.SetCondCode);
-        }
-
-        public static void Ffma(EmitterContext context)
-        {
-            IOpCodeFArith op = (IOpCodeFArith)context.CurrOp;
-
-            bool negateB = op.RawOpCode.Extract(48);
-            bool negateC = op.RawOpCode.Extract(49);
-
-            Operand srcA = GetSrcA(context);
-
-            Operand srcB = context.FPNegate(GetSrcB(context), negateB);
-            Operand srcC = context.FPNegate(GetSrcC(context), negateC);
-
-            Operand dest = GetDest(context);
-
-            context.Copy(dest, context.FPSaturate(context.FPFusedMultiplyAdd(srcA, srcB, srcC), op.Saturate));
-
-            SetFPZnFlags(context, dest, op.SetCondCode);
-        }
+        public static void Fadd(EmitterContext context) => EmitFPAdd(context, Instruction.FP32);
+        public static void Ffma(EmitterContext context) => EmitFPFma(context, Instruction.FP32);

        public static void Ffma32i(EmitterContext context)
        {
@ -103,40 +62,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
            SetFPZnFlags(context, dest, op.SetCondCode);
        }

-        public static void Fmul(EmitterContext context)
-        {
-            IOpCodeFArith op = (IOpCodeFArith)context.CurrOp;
-
-            bool isImm32 = op is OpCodeFArithImm32;
-
-            bool negateB = !isImm32 && op.RawOpCode.Extract(48);
-
-            Operand srcA = GetSrcA(context);
-
-            Operand srcB = context.FPNegate(GetSrcB(context), negateB);
-
-            switch (op.Scale)
-            {
-                case FPMultiplyScale.None: break;
-
-                case FPMultiplyScale.Divide2:   srcA = context.FPDivide  (srcA, ConstF(2)); break;
-                case FPMultiplyScale.Divide4:   srcA = context.FPDivide  (srcA, ConstF(4)); break;
-                case FPMultiplyScale.Divide8:   srcA = context.FPDivide  (srcA, ConstF(8)); break;
-                case FPMultiplyScale.Multiply2: srcA = context.FPMultiply(srcA, ConstF(2)); break;
-                case FPMultiplyScale.Multiply4: srcA = context.FPMultiply(srcA, ConstF(4)); break;
-                case FPMultiplyScale.Multiply8: srcA = context.FPMultiply(srcA, ConstF(8)); break;
-
-                default: break; //TODO: Warning.
-            }
-
-            Operand dest = GetDest(context);
-
-            bool saturate = isImm32 ? op.RawOpCode.Extract(55) : op.Saturate;
-
-            context.Copy(dest, context.FPSaturate(context.FPMultiply(srcA, srcB), saturate));
-
-            SetFPZnFlags(context, dest, op.SetCondCode);
-        }
+        public static void Fmul(EmitterContext context) => EmitFPMultiply(context, Instruction.FP32);

        public static void Fset(EmitterContext context)
        {
@ -406,6 +332,107 @@ namespace Ryujinx.Graphics.Shader.Instructions
            context.Copy(GetDest(context), context.FPSaturate(res, op.Saturate));
        }

+        private static void EmitFPAdd(EmitterContext context, Instruction fpType)
+        {
+            IOpCodeFArith op = (IOpCodeFArith)context.CurrOp;
+
+            bool isFP64 = fpType == Instruction.FP64;
+
+            bool absoluteA = op.AbsoluteA, absoluteB, negateA, negateB;
+
+            if (op is OpCodeFArithImm32)
+            {
+                negateB   = op.RawOpCode.Extract(53);
+                negateA   = op.RawOpCode.Extract(56);
+                absoluteB = op.RawOpCode.Extract(57);
+            }
+            else
+            {
+                negateB   = op.RawOpCode.Extract(45);
+                negateA   = op.RawOpCode.Extract(48);
+                absoluteB = op.RawOpCode.Extract(49);
+            }
+
+            Operand srcA = context.FPAbsNeg(GetSrcA(context, isFP64), absoluteA, negateA, fpType);
+            Operand srcB = context.FPAbsNeg(GetSrcB(context, isFP64), absoluteB, negateB, fpType);
+
+            Operand res = context.FPSaturate(context.FPAdd(srcA, srcB, fpType), op.Saturate, fpType);
+
+            SetDest(context, res, isFP64);
+
+            SetFPZnFlags(context, res, op.SetCondCode, fpType);
+        }
+
+        private static void EmitFPFma(EmitterContext context, Instruction fpType)
+        {
+            IOpCodeFArith op = (IOpCodeFArith)context.CurrOp;
+
+            bool isFP64 = fpType == Instruction.FP64;
+
+            bool negateB = op.RawOpCode.Extract(48);
+            bool negateC = op.RawOpCode.Extract(49);
+
+            Operand srcA = GetSrcA(context, isFP64);
+
+            Operand srcB = context.FPNegate(GetSrcB(context, isFP64), negateB, fpType);
+            Operand srcC = context.FPNegate(GetSrcC(context, isFP64), negateC, fpType);
+
+            Operand res = context.FPSaturate(context.FPFusedMultiplyAdd(srcA, srcB, srcC, fpType), op.Saturate, fpType);
+
+            SetDest(context, res, isFP64);
+
+            SetFPZnFlags(context, res, op.SetCondCode, fpType);
+        }
+
+        private static void EmitFPMultiply(EmitterContext context, Instruction fpType)
+        {
+            IOpCodeFArith op = (IOpCodeFArith)context.CurrOp;
+
+            bool isFP64 = fpType == Instruction.FP64;
+
+            bool isImm32 = op is OpCodeFArithImm32;
+
+            bool negateB = !isImm32 && op.RawOpCode.Extract(48);
+
+            Operand srcA = GetSrcA(context, isFP64);
+
+            Operand srcB = context.FPNegate(GetSrcB(context, isFP64), negateB, fpType);
+
+            if (op.Scale != FPMultiplyScale.None)
+            {
+                Operand scale = op.Scale switch
+                {
+                    FPMultiplyScale.Divide2 => ConstF(0.5f),
+                    FPMultiplyScale.Divide4 => ConstF(0.25f),
+                    FPMultiplyScale.Divide8 => ConstF(0.125f),
+                    FPMultiplyScale.Multiply2 => ConstF(2f),
+                    FPMultiplyScale.Multiply4 => ConstF(4f),
+                    FPMultiplyScale.Multiply8 => ConstF(8f),
+                    _ => ConstF(1) // Invalid, behave as if it had no scale.
+                };
+
+                if (scale.AsFloat() == 1)
+                {
+                    context.Config.PrintLog($"Invalid FP multiply scale \"{op.Scale}\".");
+                }
+
+                if (isFP64)
+                {
+                    scale = context.FP32ConvertToFP64(scale);
+                }
+
+                srcA = context.FPMultiply(srcA, scale, fpType);
+            }
+
+            bool saturate = isImm32 ? op.RawOpCode.Extract(55) : op.Saturate;
+
+            Operand res = context.FPSaturate(context.FPMultiply(srcA, srcB, fpType), saturate, fpType);
+
+            SetDest(context, res, isFP64);
+
+            SetFPZnFlags(context, res, op.SetCondCode, fpType);
+        }
+
        private static Operand GetFPComparison(
            EmitterContext context,
            Condition      cond,
@ -447,7 +474,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
                    default: throw new InvalidOperationException($"Unexpected condition \"{cond}\".");
                }

-                res = context.Add(inst | Instruction.FP, Local(), srcA, srcB);
+                res = context.Add(inst | Instruction.FP32, Local(), srcA, srcB);

                if ((cond & Condition.Nan) != 0)
                {
@ -483,5 +510,20 @@ namespace Ryujinx.Graphics.Shader.Instructions

            return FPAbsNeg(context, operands, false, op.NegateC);
        }
+
+        private static void SetDest(EmitterContext context, Operand value, bool isFP64)
+        {
+            if (isFP64)
+            {
+                IOpCodeRd op = (IOpCodeRd)context.CurrOp;
+
+                context.Copy(Register(op.Rd.Index, op.Rd.Type), context.UnpackDouble2x32Low(value));
+                context.Copy(Register(op.Rd.Index | 1, op.Rd.Type), context.UnpackDouble2x32High(value));
+            }
+            else
+            {
+                context.Copy(GetDest(context), value);
+            }
+        }
    }
 }
--- a/Ryujinx.Graphics.Shader/Instructions/InstEmitHelper.cs
+++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitHelper.cs
@ -34,9 +34,25 @@ namespace Ryujinx.Graphics.Shader.Instructions
            return Register(((IOpCodeRd)context.CurrOp).Rd);
        }

-        public static Operand GetSrcA(EmitterContext context)
+        public static Operand GetDest2(EmitterContext context)
        {
-            return Register(((IOpCodeRa)context.CurrOp).Ra);
+            Register rd = ((IOpCodeRd)context.CurrOp).Rd;
+
+            return Register(rd.Index | 1, rd.Type);
+        }
+
+        public static Operand GetSrcA(EmitterContext context, bool isFP64 = false)
+        {
+            IOpCodeRa op = (IOpCodeRa)context.CurrOp;
+
+            if (isFP64)
+            {
+                return context.PackDouble2x32(Register(op.Ra.Index, op.Ra.Type), Register(op.Ra.Index | 1, op.Ra.Type));
+            }
+            else
+            {
+                return Register(op.Ra);
+            }
        }

        public static Operand GetSrcB(EmitterContext context, FPType floatType)
@ -53,46 +69,78 @@ namespace Ryujinx.Graphics.Shader.Instructions
            }
            else if (floatType == FPType.FP64)
            {
-                // TODO: Double floating-point type support.
+                return GetSrcB(context, true);
            }

-            context.Config.PrintLog($"Invalid floating point type: {floatType}.");
-
-            return ConstF(0);
+            throw new ArgumentException($"Invalid floating point type \"{floatType}\".");
        }

-        public static Operand GetSrcB(EmitterContext context)
+        public static Operand GetSrcB(EmitterContext context, bool isFP64 = false)
        {
-            switch (context.CurrOp)
+            if (isFP64)
            {
-                case IOpCodeCbuf op:
-                    return Cbuf(op.Slot, op.Offset);
+                switch (context.CurrOp)
+                {
+                    case IOpCodeCbuf op:
+                        return context.PackDouble2x32(Cbuf(op.Slot, op.Offset), Cbuf(op.Slot, op.Offset + 1));

-                case IOpCodeImm op:
-                    return Const(op.Immediate);
+                    case IOpCodeImmF op:
+                        return context.FP32ConvertToFP64(ConstF(op.Immediate));

-                case IOpCodeImmF op:
-                    return ConstF(op.Immediate);
+                    case IOpCodeReg op:
+                        return context.PackDouble2x32(Register(op.Rb.Index, op.Rb.Type), Register(op.Rb.Index | 1, op.Rb.Type));

-                case IOpCodeReg op:
-                    return Register(op.Rb);
+                    case IOpCodeRegCbuf op:
+                        return context.PackDouble2x32(Register(op.Rc.Index, op.Rc.Type), Register(op.Rc.Index | 1, op.Rc.Type));
+                }
+            }
+            else
+            {
+                switch (context.CurrOp)
+                {
+                    case IOpCodeCbuf op:
+                        return Cbuf(op.Slot, op.Offset);

-                case IOpCodeRegCbuf op:
-                    return Register(op.Rc);
+                    case IOpCodeImm op:
+                        return Const(op.Immediate);
+
+                    case IOpCodeImmF op:
+                        return ConstF(op.Immediate);
+
+                    case IOpCodeReg op:
+                        return Register(op.Rb);
+
+                    case IOpCodeRegCbuf op:
+                        return Register(op.Rc);
+                }
            }

            throw new InvalidOperationException($"Unexpected opcode type \"{context.CurrOp.GetType().Name}\".");
        }

-        public static Operand GetSrcC(EmitterContext context)
+        public static Operand GetSrcC(EmitterContext context, bool isFP64 = false)
        {
-            switch (context.CurrOp)
+            if (isFP64)
            {
-                case IOpCodeRegCbuf op:
-                    return Cbuf(op.Slot, op.Offset);
+                switch (context.CurrOp)
+                {
+                    case IOpCodeRegCbuf op:
+                        return context.PackDouble2x32(Cbuf(op.Slot, op.Offset), Cbuf(op.Slot, op.Offset + 1));

-                case IOpCodeRc op:
-                    return Register(op.Rc);
+                    case IOpCodeRc op:
+                        return context.PackDouble2x32(Register(op.Rc.Index, op.Rc.Type), Register(op.Rc.Index | 1, op.Rc.Type));
+                }
+            }
+            else
+            {
+                switch (context.CurrOp)
+                {
+                    case IOpCodeRegCbuf op:
+                        return Cbuf(op.Slot, op.Offset);
+
+                    case IOpCodeRc op:
+                        return Register(op.Rc);
+                }
            }

            throw new InvalidOperationException($"Unexpected opcode type \"{context.CurrOp.GetType().Name}\".");