From dc97457bf0121b9383054ca14d3c525b56b92634 Mon Sep 17 00:00:00 2001
From: gdkchan <gab.dark.100@gmail.com>
Date: Tue, 3 Mar 2020 11:02:08 -0300
Subject: [PATCH] Initial support for double precision shader instructions.
 (#963)

* Implement DADD, DFMA and DMUL shader instructions

* Rename FP to FP32

* Correct double immediate

* Classic mistake
---
 .../CodeGen/Glsl/Declarations.cs              |   1 +
 .../CodeGen/Glsl/Instructions/InstGen.cs      |   6 +
 .../Glsl/Instructions/InstGenHelper.cs        |   4 +
 .../Glsl/Instructions/InstGenPacking.cs       |  20 ++
 .../Decoders/DecoderHelper.cs                 |  16 ++
 Ryujinx.Graphics.Shader/Decoders/FPType.cs    |  10 +
 .../Decoders/OpCodeDArithImm.cs               |  14 ++
 .../Decoders/OpCodeTable.cs                   |  10 +
 .../Instructions/InstEmitAluHelper.cs         |  13 +-
 .../Instructions/InstEmitConversion.cs        |  33 ++-
 .../Instructions/InstEmitFArith.cs            | 204 +++++++++++-------
 .../Instructions/InstEmitHelper.cs            |  96 ++++++---
 .../IntermediateRepresentation/Instruction.cs |   7 +-
 .../StructuredIr/InstructionInfo.cs           |  29 ++-
 .../StructuredIr/StructuredProgram.cs         |   2 +-
 .../StructuredIr/VariableType.cs              |   1 +
 .../Translation/EmitterContextInsts.cs        | 116 ++++++----
 .../Translation/Lowering.cs                   |   6 +-
 .../Optimizations/ConstantFolding.cs          |  24 +--
 19 files changed, 428 insertions(+), 184 deletions(-)
 create mode 100644 Ryujinx.Graphics.Shader/Decoders/OpCodeDArithImm.cs

diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs
index 2e7f9f1b07..5fcc1b277e 100644
--- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs
@@ -189,6 +189,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
             {
                 case VariableType.Bool: return "bool";
                 case VariableType.F32:  return "precise float";
+                case VariableType.F64:  return "double";
                 case VariableType.S32:  return "int";
                 case VariableType.U32:  return "uint";
             }
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs
index 73a71f9ee4..fe98277047 100644
--- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs
@@ -136,6 +136,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
                     case Instruction.Lod:
                         return InstGenMemory.Lod(context, operation);
 
+                    case Instruction.PackDouble2x32:
+                        return InstGenPacking.PackDouble2x32(context, operation);
+
                     case Instruction.PackHalf2x16:
                         return InstGenPacking.PackHalf2x16(context, operation);
 
@@ -154,6 +157,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
                     case Instruction.TextureSize:
                         return InstGenMemory.TextureSize(context, operation);
 
+                    case Instruction.UnpackDouble2x32:
+                        return InstGenPacking.UnpackDouble2x32(context, operation);
+
                     case Instruction.UnpackHalf2x16:
                         return InstGenPacking.UnpackHalf2x16(context, operation);
                 }
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs
index 8dec34997a..15f9b6665d 100644
--- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs
@@ -50,6 +50,8 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
             Add(Instruction.CompareLessU32,           InstType.OpBinary,       "<",               4);
             Add(Instruction.CompareNotEqual,          InstType.OpBinaryCom,    "!=",              5);
             Add(Instruction.ConditionalSelect,        InstType.OpTernary,      "?:",              12);
+            Add(Instruction.ConvertFP32ToFP64,        InstType.CallUnary,      "double");
+            Add(Instruction.ConvertFP64ToFP32,        InstType.CallUnary,      "float");
             Add(Instruction.ConvertFPToS32,           InstType.CallUnary,      "int");
             Add(Instruction.ConvertFPToU32,           InstType.CallUnary,      "uint");
             Add(Instruction.ConvertS32ToFP,           InstType.CallUnary,      "float");
@@ -83,6 +85,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
             Add(Instruction.LogicalOr,                InstType.OpBinaryCom,    "||",              11);
             Add(Instruction.LoopBreak,                InstType.OpNullary,      "break");
             Add(Instruction.LoopContinue,             InstType.OpNullary,      "continue");
+            Add(Instruction.PackDouble2x32,           InstType.Special);
             Add(Instruction.PackHalf2x16,             InstType.Special);
             Add(Instruction.ShiftLeft,                InstType.OpBinary,       "<<",              3);
             Add(Instruction.ShiftRightS32,            InstType.OpBinary,       ">>",              3);
@@ -113,6 +116,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
             Add(Instruction.TextureSample,            InstType.Special);
             Add(Instruction.TextureSize,              InstType.Special);
             Add(Instruction.Truncate,                 InstType.CallUnary,      "trunc");
+            Add(Instruction.UnpackDouble2x32,         InstType.Special);
             Add(Instruction.UnpackHalf2x16,           InstType.Special);
             Add(Instruction.VoteAll,                  InstType.CallUnary,      "allInvocationsARB");
             Add(Instruction.VoteAllEqual,             InstType.CallUnary,      "allInvocationsEqualARB");
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenPacking.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenPacking.cs
index e5167f93f8..ecb90c1e08 100644
--- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenPacking.cs
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenPacking.cs
@@ -7,6 +7,17 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
 {
     static class InstGenPacking
     {
+        public static string PackDouble2x32(CodeGenContext context, AstOperation operation)
+        {
+            IAstNode src0 = operation.GetSource(0);
+            IAstNode src1 = operation.GetSource(1);
+
+            string src0Expr = GetSoureExpr(context, src0, GetSrcVarType(operation.Inst, 0));
+            string src1Expr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 1));
+
+            return $"packDouble2x32(uvec2({src0Expr}, {src1Expr}))";
+        }
+
         public static string PackHalf2x16(CodeGenContext context, AstOperation operation)
         {
             IAstNode src0 = operation.GetSource(0);
@@ -18,6 +29,15 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
             return $"packHalf2x16(vec2({src0Expr}, {src1Expr}))";
         }
 
+        public static string UnpackDouble2x32(CodeGenContext context, AstOperation operation)
+        {
+            IAstNode src = operation.GetSource(0);
+
+            string srcExpr = GetSoureExpr(context, src, GetSrcVarType(operation.Inst, 0));
+
+            return $"unpackDouble2x32({srcExpr}){GetMask(operation.Index)}";
+        }
+
         public static string UnpackHalf2x16(CodeGenContext context, AstOperation operation)
         {
             IAstNode src = operation.GetSource(0);
diff --git a/Ryujinx.Graphics.Shader/Decoders/DecoderHelper.cs b/Ryujinx.Graphics.Shader/Decoders/DecoderHelper.cs
index 77cd1bf728..3585c35fa4 100644
--- a/Ryujinx.Graphics.Shader/Decoders/DecoderHelper.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/DecoderHelper.cs
@@ -54,5 +54,21 @@ namespace Ryujinx.Graphics.Shader.Decoders
 
             return BitConverter.Int32BitsToSingle(imm);
         }
+
+        public static float DecodeD20Immediate(long opCode)
+        {
+            long imm = opCode.Extract(20, 19);
+
+            bool negate = opCode.Extract(56);
+
+            imm <<= 44;
+
+            if (negate)
+            {
+                imm |= 1L << 63;
+            }
+
+            return (float)BitConverter.Int64BitsToDouble(imm);
+        }
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/Decoders/FPType.cs b/Ryujinx.Graphics.Shader/Decoders/FPType.cs
index e602ad45fa..b5af2c1d24 100644
--- a/Ryujinx.Graphics.Shader/Decoders/FPType.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/FPType.cs
@@ -1,3 +1,5 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+
 namespace Ryujinx.Graphics.Shader.Decoders
 {
     enum FPType
@@ -6,4 +8,12 @@ namespace Ryujinx.Graphics.Shader.Decoders
         FP32 = 2,
         FP64 = 3
     }
+
+    static class FPTypeExtensions
+    {
+        public static Instruction ToInstFPType(this FPType type)
+        {
+            return type == FPType.FP64 ? Instruction.FP64 : Instruction.FP32;
+        }
+    }
 }
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeDArithImm.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeDArithImm.cs
new file mode 100644
index 0000000000..99d4cdfdd1
--- /dev/null
+++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeDArithImm.cs
@@ -0,0 +1,14 @@
+using Ryujinx.Graphics.Shader.Instructions;
+
+namespace Ryujinx.Graphics.Shader.Decoders
+{
+    class OpCodeDArithImm : OpCodeFArith, IOpCodeImmF
+    {
+        public float Immediate { get; }
+
+        public OpCodeDArithImm(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode)
+        {
+            Immediate = DecoderHelper.DecodeD20Immediate(opCode);
+        }
+    }
+}
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs
index 87f1de0c48..72f66f4aa3 100644
--- a/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs
@@ -45,6 +45,16 @@ namespace Ryujinx.Graphics.Shader.Decoders
             Set("111000110100xx", InstEmit.Brk,     typeof(OpCodeBranchPop));
             Set("111000100101xx", InstEmit.Brx,     typeof(OpCodeBranchIndir));
             Set("0101000010100x", InstEmit.Csetp,   typeof(OpCodePset));
+            Set("0100110001110x", InstEmit.Dadd,    typeof(OpCodeFArithCbuf));
+            Set("0011100x01110x", InstEmit.Dadd,    typeof(OpCodeDArithImm));
+            Set("0101110001110x", InstEmit.Dadd,    typeof(OpCodeFArithReg));
+            Set("010010110111xx", InstEmit.Dfma,    typeof(OpCodeFArithCbuf));
+            Set("0011011x0111xx", InstEmit.Dfma,    typeof(OpCodeDArithImm));
+            Set("010100110111xx", InstEmit.Dfma,    typeof(OpCodeFArithRegCbuf));
+            Set("010110110111xx", InstEmit.Dfma,    typeof(OpCodeFArithReg));
+            Set("0100110010000x", InstEmit.Dmul,    typeof(OpCodeFArithCbuf));
+            Set("0011100x10000x", InstEmit.Dmul,    typeof(OpCodeDArithImm));
+            Set("0101110010000x", InstEmit.Dmul,    typeof(OpCodeFArithReg));
             Set("111000110000xx", InstEmit.Exit,    typeof(OpCodeExit));
             Set("0100110010101x", InstEmit.F2F,     typeof(OpCodeFArithCbuf));
             Set("0011100x10101x", InstEmit.F2F,     typeof(OpCodeFArithImm));
diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs
index 572068dad3..588ec2163b 100644
--- a/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs
+++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs
@@ -85,12 +85,19 @@ namespace Ryujinx.Graphics.Shader.Instructions
             context.Copy(GetNF(), context.ICompareLess(dest, Const(0)));
         }
 
-        public static void SetFPZnFlags(EmitterContext context, Operand dest, bool setCC)
+        public static void SetFPZnFlags(EmitterContext context, Operand dest, bool setCC, Instruction fpType = Instruction.FP32)
         {
             if (setCC)
             {
-                context.Copy(GetZF(), context.FPCompareEqual(dest, ConstF(0)));
-                context.Copy(GetNF(), context.FPCompareLess (dest, ConstF(0)));
+                Operand zero = ConstF(0);
+
+                if (fpType == Instruction.FP64)
+                {
+                    zero = context.FP32ConvertToFP64(zero);
+                }
+
+                context.Copy(GetZF(), context.FPCompareEqual(dest, zero, fpType));
+                context.Copy(GetNF(), context.FPCompareLess (dest, zero, fpType));
             }
         }
     }
diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitConversion.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitConversion.cs
index afec77616b..8716d4e734 100644
--- a/Ryujinx.Graphics.Shader/Instructions/InstEmitConversion.cs
+++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitConversion.cs
@@ -21,31 +21,43 @@ namespace Ryujinx.Graphics.Shader.Instructions
             bool negateB   = op.RawOpCode.Extract(45);
             bool absoluteB = op.RawOpCode.Extract(49);
 
-            Operand srcB = context.FPAbsNeg(GetSrcB(context, srcType), absoluteB, negateB);
+            Operand srcB = context.FPAbsNeg(GetSrcB(context, srcType), absoluteB, negateB, srcType.ToInstFPType());
 
-            if (round)
+            if (round && srcType == dstType)
             {
                 switch (op.RoundingMode)
                 {
                     case RoundingMode.ToNearest:
-                        srcB = context.FPRound(srcB);
+                        srcB = context.FPRound(srcB, srcType.ToInstFPType());
                         break;
 
                     case RoundingMode.TowardsNegativeInfinity:
-                        srcB = context.FPFloor(srcB);
+                        srcB = context.FPFloor(srcB, srcType.ToInstFPType());
                         break;
 
                     case RoundingMode.TowardsPositiveInfinity:
-                        srcB = context.FPCeiling(srcB);
+                        srcB = context.FPCeiling(srcB, srcType.ToInstFPType());
                         break;
 
                     case RoundingMode.TowardsZero:
-                        srcB = context.FPTruncate(srcB);
+                        srcB = context.FPTruncate(srcB, srcType.ToInstFPType());
                         break;
                 }
             }
 
-            srcB = context.FPSaturate(srcB, op.Saturate);
+            // We don't need to handle conversions between FP16 <-> FP32
+            // since we do FP16 operations as FP32 directly.
+            // FP16 <-> FP64 conversions are invalid.
+            if (srcType == FPType.FP32 && dstType == FPType.FP64)
+            {
+                srcB = context.FP32ConvertToFP64(srcB);
+            }
+            else if (srcType == FPType.FP64 && dstType == FPType.FP32)
+            {
+                srcB = context.FP64ConvertToFP32(srcB);
+            }
+
+            srcB = context.FPSaturate(srcB, op.Saturate, dstType.ToInstFPType());
 
             WriteFP(context, dstType, srcB);
 
@@ -229,9 +241,12 @@ namespace Ryujinx.Graphics.Shader.Instructions
             {
                 context.Copy(dest, context.PackHalf2x16(srcB, ConstF(0)));
             }
-            else
+            else /* if (type == FPType.FP64) */
             {
-                // TODO.
+                Operand dest2 = GetDest2(context);
+
+                context.Copy(dest, context.UnpackDouble2x32Low(srcB));
+                context.Copy(dest2, context.UnpackDouble2x32High(srcB));
             }
         }
     }
diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs
index 23f40d4692..fa5c684c13 100644
--- a/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs
+++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs
@@ -11,53 +11,12 @@ namespace Ryujinx.Graphics.Shader.Instructions
 {
     static partial class InstEmit
     {
-        public static void Fadd(EmitterContext context)
-        {
-            IOpCodeFArith op = (IOpCodeFArith)context.CurrOp;
+        public static void Dadd(EmitterContext context) => EmitFPAdd(context, Instruction.FP64);
+        public static void Dfma(EmitterContext context) => EmitFPFma(context, Instruction.FP64);
+        public static void Dmul(EmitterContext context) => EmitFPMultiply(context, Instruction.FP64);
 
-            bool absoluteA = op.AbsoluteA, absoluteB, negateA, negateB;
-
-            if (op is OpCodeFArithImm32)
-            {
-                negateB   = op.RawOpCode.Extract(53);
-                negateA   = op.RawOpCode.Extract(56);
-                absoluteB = op.RawOpCode.Extract(57);
-            }
-            else
-            {
-                negateB   = op.RawOpCode.Extract(45);
-                negateA   = op.RawOpCode.Extract(48);
-                absoluteB = op.RawOpCode.Extract(49);
-            }
-
-            Operand srcA = context.FPAbsNeg(GetSrcA(context), absoluteA, negateA);
-            Operand srcB = context.FPAbsNeg(GetSrcB(context), absoluteB, negateB);
-
-            Operand dest = GetDest(context);
-
-            context.Copy(dest, context.FPSaturate(context.FPAdd(srcA, srcB), op.Saturate));
-
-            SetFPZnFlags(context, dest, op.SetCondCode);
-        }
-
-        public static void Ffma(EmitterContext context)
-        {
-            IOpCodeFArith op = (IOpCodeFArith)context.CurrOp;
-
-            bool negateB = op.RawOpCode.Extract(48);
-            bool negateC = op.RawOpCode.Extract(49);
-
-            Operand srcA = GetSrcA(context);
-
-            Operand srcB = context.FPNegate(GetSrcB(context), negateB);
-            Operand srcC = context.FPNegate(GetSrcC(context), negateC);
-
-            Operand dest = GetDest(context);
-
-            context.Copy(dest, context.FPSaturate(context.FPFusedMultiplyAdd(srcA, srcB, srcC), op.Saturate));
-
-            SetFPZnFlags(context, dest, op.SetCondCode);
-        }
+        public static void Fadd(EmitterContext context) => EmitFPAdd(context, Instruction.FP32);
+        public static void Ffma(EmitterContext context) => EmitFPFma(context, Instruction.FP32);
 
         public static void Ffma32i(EmitterContext context)
         {
@@ -103,40 +62,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
             SetFPZnFlags(context, dest, op.SetCondCode);
         }
 
-        public static void Fmul(EmitterContext context)
-        {
-            IOpCodeFArith op = (IOpCodeFArith)context.CurrOp;
-
-            bool isImm32 = op is OpCodeFArithImm32;
-
-            bool negateB = !isImm32 && op.RawOpCode.Extract(48);
-
-            Operand srcA = GetSrcA(context);
-
-            Operand srcB = context.FPNegate(GetSrcB(context), negateB);
-
-            switch (op.Scale)
-            {
-                case FPMultiplyScale.None: break;
-
-                case FPMultiplyScale.Divide2:   srcA = context.FPDivide  (srcA, ConstF(2)); break;
-                case FPMultiplyScale.Divide4:   srcA = context.FPDivide  (srcA, ConstF(4)); break;
-                case FPMultiplyScale.Divide8:   srcA = context.FPDivide  (srcA, ConstF(8)); break;
-                case FPMultiplyScale.Multiply2: srcA = context.FPMultiply(srcA, ConstF(2)); break;
-                case FPMultiplyScale.Multiply4: srcA = context.FPMultiply(srcA, ConstF(4)); break;
-                case FPMultiplyScale.Multiply8: srcA = context.FPMultiply(srcA, ConstF(8)); break;
-
-                default: break; //TODO: Warning.
-            }
-
-            Operand dest = GetDest(context);
-
-            bool saturate = isImm32 ? op.RawOpCode.Extract(55) : op.Saturate;
-
-            context.Copy(dest, context.FPSaturate(context.FPMultiply(srcA, srcB), saturate));
-
-            SetFPZnFlags(context, dest, op.SetCondCode);
-        }
+        public static void Fmul(EmitterContext context) => EmitFPMultiply(context, Instruction.FP32);
 
         public static void Fset(EmitterContext context)
         {
@@ -406,6 +332,107 @@ namespace Ryujinx.Graphics.Shader.Instructions
             context.Copy(GetDest(context), context.FPSaturate(res, op.Saturate));
         }
 
+        private static void EmitFPAdd(EmitterContext context, Instruction fpType)
+        {
+            IOpCodeFArith op = (IOpCodeFArith)context.CurrOp;
+
+            bool isFP64 = fpType == Instruction.FP64;
+
+            bool absoluteA = op.AbsoluteA, absoluteB, negateA, negateB;
+
+            if (op is OpCodeFArithImm32)
+            {
+                negateB   = op.RawOpCode.Extract(53);
+                negateA   = op.RawOpCode.Extract(56);
+                absoluteB = op.RawOpCode.Extract(57);
+            }
+            else
+            {
+                negateB   = op.RawOpCode.Extract(45);
+                negateA   = op.RawOpCode.Extract(48);
+                absoluteB = op.RawOpCode.Extract(49);
+            }
+
+            Operand srcA = context.FPAbsNeg(GetSrcA(context, isFP64), absoluteA, negateA, fpType);
+            Operand srcB = context.FPAbsNeg(GetSrcB(context, isFP64), absoluteB, negateB, fpType);
+
+            Operand res = context.FPSaturate(context.FPAdd(srcA, srcB, fpType), op.Saturate, fpType);
+
+            SetDest(context, res, isFP64);
+
+            SetFPZnFlags(context, res, op.SetCondCode, fpType);
+        }
+
+        private static void EmitFPFma(EmitterContext context, Instruction fpType)
+        {
+            IOpCodeFArith op = (IOpCodeFArith)context.CurrOp;
+
+            bool isFP64 = fpType == Instruction.FP64;
+
+            bool negateB = op.RawOpCode.Extract(48);
+            bool negateC = op.RawOpCode.Extract(49);
+
+            Operand srcA = GetSrcA(context, isFP64);
+
+            Operand srcB = context.FPNegate(GetSrcB(context, isFP64), negateB, fpType);
+            Operand srcC = context.FPNegate(GetSrcC(context, isFP64), negateC, fpType);
+
+            Operand res = context.FPSaturate(context.FPFusedMultiplyAdd(srcA, srcB, srcC, fpType), op.Saturate, fpType);
+
+            SetDest(context, res, isFP64);
+
+            SetFPZnFlags(context, res, op.SetCondCode, fpType);
+        }
+
+        private static void EmitFPMultiply(EmitterContext context, Instruction fpType)
+        {
+            IOpCodeFArith op = (IOpCodeFArith)context.CurrOp;
+
+            bool isFP64 = fpType == Instruction.FP64;
+
+            bool isImm32 = op is OpCodeFArithImm32;
+
+            bool negateB = !isImm32 && op.RawOpCode.Extract(48);
+
+            Operand srcA = GetSrcA(context, isFP64);
+
+            Operand srcB = context.FPNegate(GetSrcB(context, isFP64), negateB, fpType);
+
+            if (op.Scale != FPMultiplyScale.None)
+            {
+                Operand scale = op.Scale switch
+                {
+                    FPMultiplyScale.Divide2 => ConstF(0.5f),
+                    FPMultiplyScale.Divide4 => ConstF(0.25f),
+                    FPMultiplyScale.Divide8 => ConstF(0.125f),
+                    FPMultiplyScale.Multiply2 => ConstF(2f),
+                    FPMultiplyScale.Multiply4 => ConstF(4f),
+                    FPMultiplyScale.Multiply8 => ConstF(8f),
+                    _ => ConstF(1) // Invalid, behave as if it had no scale.
+                };
+
+                if (scale.AsFloat() == 1)
+                {
+                    context.Config.PrintLog($"Invalid FP multiply scale \"{op.Scale}\".");
+                }
+
+                if (isFP64)
+                {
+                    scale = context.FP32ConvertToFP64(scale);
+                }
+
+                srcA = context.FPMultiply(srcA, scale, fpType);
+            }
+
+            bool saturate = isImm32 ? op.RawOpCode.Extract(55) : op.Saturate;
+
+            Operand res = context.FPSaturate(context.FPMultiply(srcA, srcB, fpType), saturate, fpType);
+
+            SetDest(context, res, isFP64);
+
+            SetFPZnFlags(context, res, op.SetCondCode, fpType);
+        }
+
         private static Operand GetFPComparison(
             EmitterContext context,
             Condition      cond,
@@ -447,7 +474,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
                     default: throw new InvalidOperationException($"Unexpected condition \"{cond}\".");
                 }
 
-                res = context.Add(inst | Instruction.FP, Local(), srcA, srcB);
+                res = context.Add(inst | Instruction.FP32, Local(), srcA, srcB);
 
                 if ((cond & Condition.Nan) != 0)
                 {
@@ -483,5 +510,20 @@ namespace Ryujinx.Graphics.Shader.Instructions
 
             return FPAbsNeg(context, operands, false, op.NegateC);
         }
+
+        private static void SetDest(EmitterContext context, Operand value, bool isFP64)
+        {
+            if (isFP64)
+            {
+                IOpCodeRd op = (IOpCodeRd)context.CurrOp;
+
+                context.Copy(Register(op.Rd.Index, op.Rd.Type), context.UnpackDouble2x32Low(value));
+                context.Copy(Register(op.Rd.Index | 1, op.Rd.Type), context.UnpackDouble2x32High(value));
+            }
+            else
+            {
+                context.Copy(GetDest(context), value);
+            }
+        }
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitHelper.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitHelper.cs
index 5123a6e2af..09d90e157c 100644
--- a/Ryujinx.Graphics.Shader/Instructions/InstEmitHelper.cs
+++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitHelper.cs
@@ -34,9 +34,25 @@ namespace Ryujinx.Graphics.Shader.Instructions
             return Register(((IOpCodeRd)context.CurrOp).Rd);
         }
 
-        public static Operand GetSrcA(EmitterContext context)
+        public static Operand GetDest2(EmitterContext context)
         {
-            return Register(((IOpCodeRa)context.CurrOp).Ra);
+            Register rd = ((IOpCodeRd)context.CurrOp).Rd;
+
+            return Register(rd.Index | 1, rd.Type);
+        }
+
+        public static Operand GetSrcA(EmitterContext context, bool isFP64 = false)
+        {
+            IOpCodeRa op = (IOpCodeRa)context.CurrOp;
+
+            if (isFP64)
+            {
+                return context.PackDouble2x32(Register(op.Ra.Index, op.Ra.Type), Register(op.Ra.Index | 1, op.Ra.Type));
+            }
+            else
+            {
+                return Register(op.Ra);
+            }
         }
 
         public static Operand GetSrcB(EmitterContext context, FPType floatType)
@@ -53,46 +69,78 @@ namespace Ryujinx.Graphics.Shader.Instructions
             }
             else if (floatType == FPType.FP64)
             {
-                // TODO: Double floating-point type support.
+                return GetSrcB(context, true);
             }
 
-            context.Config.PrintLog($"Invalid floating point type: {floatType}.");
-
-            return ConstF(0);
+            throw new ArgumentException($"Invalid floating point type \"{floatType}\".");
         }
 
-        public static Operand GetSrcB(EmitterContext context)
+        public static Operand GetSrcB(EmitterContext context, bool isFP64 = false)
         {
-            switch (context.CurrOp)
+            if (isFP64)
             {
-                case IOpCodeCbuf op:
-                    return Cbuf(op.Slot, op.Offset);
+                switch (context.CurrOp)
+                {
+                    case IOpCodeCbuf op:
+                        return context.PackDouble2x32(Cbuf(op.Slot, op.Offset), Cbuf(op.Slot, op.Offset + 1));
 
-                case IOpCodeImm op:
-                    return Const(op.Immediate);
+                    case IOpCodeImmF op:
+                        return context.FP32ConvertToFP64(ConstF(op.Immediate));
 
-                case IOpCodeImmF op:
-                    return ConstF(op.Immediate);
+                    case IOpCodeReg op:
+                        return context.PackDouble2x32(Register(op.Rb.Index, op.Rb.Type), Register(op.Rb.Index | 1, op.Rb.Type));
 
-                case IOpCodeReg op:
-                    return Register(op.Rb);
+                    case IOpCodeRegCbuf op:
+                        return context.PackDouble2x32(Register(op.Rc.Index, op.Rc.Type), Register(op.Rc.Index | 1, op.Rc.Type));
+                }
+            }
+            else
+            {
+                switch (context.CurrOp)
+                {
+                    case IOpCodeCbuf op:
+                        return Cbuf(op.Slot, op.Offset);
 
-                case IOpCodeRegCbuf op:
-                    return Register(op.Rc);
+                    case IOpCodeImm op:
+                        return Const(op.Immediate);
+
+                    case IOpCodeImmF op:
+                        return ConstF(op.Immediate);
+
+                    case IOpCodeReg op:
+                        return Register(op.Rb);
+
+                    case IOpCodeRegCbuf op:
+                        return Register(op.Rc);
+                }
             }
 
             throw new InvalidOperationException($"Unexpected opcode type \"{context.CurrOp.GetType().Name}\".");
         }
 
-        public static Operand GetSrcC(EmitterContext context)
+        public static Operand GetSrcC(EmitterContext context, bool isFP64 = false)
         {
-            switch (context.CurrOp)
+            if (isFP64)
             {
-                case IOpCodeRegCbuf op:
-                    return Cbuf(op.Slot, op.Offset);
+                switch (context.CurrOp)
+                {
+                    case IOpCodeRegCbuf op:
+                        return context.PackDouble2x32(Cbuf(op.Slot, op.Offset), Cbuf(op.Slot, op.Offset + 1));
 
-                case IOpCodeRc op:
-                    return Register(op.Rc);
+                    case IOpCodeRc op:
+                        return context.PackDouble2x32(Register(op.Rc.Index, op.Rc.Type), Register(op.Rc.Index | 1, op.Rc.Type));
+                }
+            }
+            else
+            {
+                switch (context.CurrOp)
+                {
+                    case IOpCodeRegCbuf op:
+                        return Cbuf(op.Slot, op.Offset);
+
+                    case IOpCodeRc op:
+                        return Register(op.Rc);
+                }
             }
 
             throw new InvalidOperationException($"Unexpected opcode type \"{context.CurrOp.GetType().Name}\".");
diff --git a/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs b/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs
index 7108112c97..4a6c3a786a 100644
--- a/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs
+++ b/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs
@@ -46,6 +46,8 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
         CompareLessU32,
         CompareNotEqual,
         ConditionalSelect,
+        ConvertFP32ToFP64,
+        ConvertFP64ToFP32,
         ConvertFPToS32,
         ConvertFPToU32,
         ConvertS32ToFP,
@@ -122,9 +124,10 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
 
         Count,
 
-        FP = 1 << 16,
+        FP32 = 1 << 16,
+        FP64 = 1 << 17,
 
-        MrShift = 17,
+        MrShift = 18,
 
         MrGlobal  = 0 << MrShift,
         MrShared  = 1 << MrShift,
diff --git a/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs b/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs
index 0482c35eea..3fcc5f1189 100644
--- a/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs
+++ b/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs
@@ -49,7 +49,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
             Add(Instruction.BitwiseOr,                VariableType.Int,    VariableType.Int,    VariableType.Int);
             Add(Instruction.BranchIfTrue,             VariableType.None,   VariableType.Bool);
             Add(Instruction.BranchIfFalse,            VariableType.None,   VariableType.Bool);
-            Add(Instruction.Ceiling,                  VariableType.F32,    VariableType.F32,    VariableType.F32);
+            Add(Instruction.Ceiling,                  VariableType.Scalar, VariableType.Scalar, VariableType.Scalar);
             Add(Instruction.Clamp,                    VariableType.Scalar, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar);
             Add(Instruction.ClampU32,                 VariableType.U32,    VariableType.U32,    VariableType.U32,    VariableType.U32);
             Add(Instruction.CompareEqual,             VariableType.Bool,   VariableType.Scalar, VariableType.Scalar);
@@ -63,6 +63,8 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
             Add(Instruction.CompareLessU32,           VariableType.Bool,   VariableType.U32,    VariableType.U32);
             Add(Instruction.CompareNotEqual,          VariableType.Bool,   VariableType.Scalar, VariableType.Scalar);
             Add(Instruction.ConditionalSelect,        VariableType.Scalar, VariableType.Bool,   VariableType.Scalar, VariableType.Scalar);
+            Add(Instruction.ConvertFP32ToFP64,        VariableType.F64,    VariableType.F32);
+            Add(Instruction.ConvertFP64ToFP32,        VariableType.F32,    VariableType.F64);
             Add(Instruction.ConvertFPToS32,           VariableType.S32,    VariableType.F32);
             Add(Instruction.ConvertFPToU32,           VariableType.U32,    VariableType.F32);
             Add(Instruction.ConvertS32ToFP,           VariableType.F32,    VariableType.S32);
@@ -74,8 +76,8 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
             Add(Instruction.ExponentB2,               VariableType.Scalar, VariableType.Scalar);
             Add(Instruction.FindFirstSetS32,          VariableType.S32,    VariableType.S32);
             Add(Instruction.FindFirstSetU32,          VariableType.S32,    VariableType.U32);
-            Add(Instruction.Floor,                    VariableType.F32,    VariableType.F32);
-            Add(Instruction.FusedMultiplyAdd,         VariableType.F32,    VariableType.F32,    VariableType.F32,    VariableType.F32);
+            Add(Instruction.Floor,                    VariableType.Scalar, VariableType.Scalar);
+            Add(Instruction.FusedMultiplyAdd,         VariableType.Scalar, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar);
             Add(Instruction.ImageLoad,                VariableType.F32);
             Add(Instruction.ImageStore,               VariableType.None);
             Add(Instruction.IsNan,                    VariableType.Bool,   VariableType.F32);
@@ -106,9 +108,10 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
             Add(Instruction.MultiplyHighS32,          VariableType.S32,    VariableType.S32,    VariableType.S32);
             Add(Instruction.MultiplyHighU32,          VariableType.U32,    VariableType.U32,    VariableType.U32);
             Add(Instruction.Negate,                   VariableType.Scalar, VariableType.Scalar);
+            Add(Instruction.PackDouble2x32,           VariableType.F64,    VariableType.U32,    VariableType.U32);
             Add(Instruction.PackHalf2x16,             VariableType.U32,    VariableType.F32,    VariableType.F32);
             Add(Instruction.ReciprocalSquareRoot,     VariableType.Scalar, VariableType.Scalar);
-            Add(Instruction.Round,                    VariableType.F32,    VariableType.F32);
+            Add(Instruction.Round,                    VariableType.Scalar, VariableType.Scalar);
             Add(Instruction.Sine,                     VariableType.Scalar, VariableType.Scalar);
             Add(Instruction.SquareRoot,               VariableType.Scalar, VariableType.Scalar);
             Add(Instruction.StoreGlobal,              VariableType.None,   VariableType.S32,    VariableType.S32,    VariableType.U32);
@@ -119,7 +122,8 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
             Add(Instruction.SwizzleAdd,               VariableType.F32,    VariableType.F32,    VariableType.F32,    VariableType.S32);
             Add(Instruction.TextureSample,            VariableType.F32);
             Add(Instruction.TextureSize,              VariableType.S32,    VariableType.S32,    VariableType.S32);
-            Add(Instruction.Truncate,                 VariableType.F32,    VariableType.F32);
+            Add(Instruction.Truncate,                 VariableType.Scalar, VariableType.Scalar);
+            Add(Instruction.UnpackDouble2x32,         VariableType.U32,    VariableType.F64);
             Add(Instruction.UnpackHalf2x16,           VariableType.F32,    VariableType.U32);
             Add(Instruction.VoteAll,                  VariableType.Bool,   VariableType.Bool);
             Add(Instruction.VoteAllEqual,             VariableType.Bool,   VariableType.Bool);
@@ -155,9 +159,18 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
         {
             if (type == VariableType.Scalar)
             {
-                return (inst & Instruction.FP) != 0
-                    ? VariableType.F32
-                    : VariableType.S32;
+                if ((inst & Instruction.FP32) != 0)
+                {
+                    return VariableType.F32;
+                }
+                else if ((inst & Instruction.FP64) != 0)
+                {
+                    return VariableType.F64;
+                }
+                else
+                {
+                    return VariableType.S32;
+                }
             }
             else if (type == VariableType.Int)
             {
diff --git a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs
index 504dc38676..4758b08fac 100644
--- a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs
+++ b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs
@@ -104,7 +104,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
 
                     if (isCondSel && type == VariableType.F32)
                     {
-                        inst |= Instruction.FP;
+                        inst |= Instruction.FP32;
                     }
 
                     dest.VarType = type;
diff --git a/Ryujinx.Graphics.Shader/StructuredIr/VariableType.cs b/Ryujinx.Graphics.Shader/StructuredIr/VariableType.cs
index 4c7f384978..0afafb2b7f 100644
--- a/Ryujinx.Graphics.Shader/StructuredIr/VariableType.cs
+++ b/Ryujinx.Graphics.Shader/StructuredIr/VariableType.cs
@@ -7,6 +7,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
         Scalar,
         Int,
         F32,
+        F64,
         S32,
         U32
     }
diff --git a/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs b/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs
index 14675a55d6..c8d622b2c8 100644
--- a/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs
+++ b/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs
@@ -181,44 +181,54 @@ namespace Ryujinx.Graphics.Shader.Translation
             return context.Add(Instruction.FindFirstSetU32, Local(), a);
         }
 
-        public static Operand FPAbsNeg(this EmitterContext context, Operand a, bool abs, bool neg)
+        public static Operand FP32ConvertToFP64(this EmitterContext context, Operand a)
         {
-            return context.FPNegate(context.FPAbsolute(a, abs), neg);
+            return context.Add(Instruction.ConvertFP32ToFP64, Local(), a);
         }
 
-        public static Operand FPAbsolute(this EmitterContext context, Operand a, bool abs)
+        public static Operand FP64ConvertToFP32(this EmitterContext context, Operand a)
+        {
+            return context.Add(Instruction.ConvertFP64ToFP32, Local(), a);
+        }
+
+        public static Operand FPAbsNeg(this EmitterContext context, Operand a, bool abs, bool neg, Instruction fpType = Instruction.FP32)
+        {
+            return context.FPNegate(context.FPAbsolute(a, abs, fpType), neg, fpType);
+        }
+
+        public static Operand FPAbsolute(this EmitterContext context, Operand a, bool abs, Instruction fpType = Instruction.FP32)
         {
             if (abs)
             {
-                a = context.FPAbsolute(a);
+                a = context.FPAbsolute(a, fpType);
             }
 
             return a;
         }
 
-        public static Operand FPAbsolute(this EmitterContext context, Operand a)
+        public static Operand FPAbsolute(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
         {
-            return context.Add(Instruction.FP | Instruction.Absolute, Local(), a);
+            return context.Add(fpType | Instruction.Absolute, Local(), a);
         }
 
-        public static Operand FPAdd(this EmitterContext context, Operand a, Operand b)
+        public static Operand FPAdd(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
         {
-            return context.Add(Instruction.FP | Instruction.Add, Local(), a, b);
+            return context.Add(fpType | Instruction.Add, Local(), a, b);
         }
 
-        public static Operand FPCeiling(this EmitterContext context, Operand a)
+        public static Operand FPCeiling(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
         {
-            return context.Add(Instruction.FP | Instruction.Ceiling, Local(), a);
+            return context.Add(fpType | Instruction.Ceiling, Local(), a);
         }
 
-        public static Operand FPCompareEqual(this EmitterContext context, Operand a, Operand b)
+        public static Operand FPCompareEqual(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
         {
-            return context.Add(Instruction.FP | Instruction.CompareEqual, Local(), a, b);
+            return context.Add(fpType | Instruction.CompareEqual, Local(), a, b);
         }
 
-        public static Operand FPCompareLess(this EmitterContext context, Operand a, Operand b)
+        public static Operand FPCompareLess(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
         {
-            return context.Add(Instruction.FP | Instruction.CompareLess, Local(), a, b);
+            return context.Add(fpType | Instruction.CompareLess, Local(), a, b);
         }
 
         public static Operand FPConvertToS32(this EmitterContext context, Operand a)
@@ -233,62 +243,62 @@ namespace Ryujinx.Graphics.Shader.Translation
 
         public static Operand FPCosine(this EmitterContext context, Operand a)
         {
-            return context.Add(Instruction.FP | Instruction.Cosine, Local(), a);
+            return context.Add(Instruction.FP32 | Instruction.Cosine, Local(), a);
         }
 
         public static Operand FPDivide(this EmitterContext context, Operand a, Operand b)
         {
-            return context.Add(Instruction.FP | Instruction.Divide, Local(), a, b);
+            return context.Add(Instruction.FP32 | Instruction.Divide, Local(), a, b);
         }
 
         public static Operand FPExponentB2(this EmitterContext context, Operand a)
         {
-            return context.Add(Instruction.FP | Instruction.ExponentB2, Local(), a);
+            return context.Add(Instruction.FP32 | Instruction.ExponentB2, Local(), a);
         }
 
-        public static Operand FPFloor(this EmitterContext context, Operand a)
+        public static Operand FPFloor(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
         {
-            return context.Add(Instruction.FP | Instruction.Floor, Local(), a);
+            return context.Add(fpType | Instruction.Floor, Local(), a);
         }
 
-        public static Operand FPFusedMultiplyAdd(this EmitterContext context, Operand a, Operand b, Operand c)
+        public static Operand FPFusedMultiplyAdd(this EmitterContext context, Operand a, Operand b, Operand c, Instruction fpType = Instruction.FP32)
         {
-            return context.Add(Instruction.FusedMultiplyAdd, Local(), a, b, c);
+            return context.Add(fpType | Instruction.FusedMultiplyAdd, Local(), a, b, c);
         }
 
         public static Operand FPLogarithmB2(this EmitterContext context, Operand a)
         {
-            return context.Add(Instruction.FP | Instruction.LogarithmB2, Local(), a);
+            return context.Add(Instruction.FP32 | Instruction.LogarithmB2, Local(), a);
         }
 
         public static Operand FPMaximum(this EmitterContext context, Operand a, Operand b)
         {
-            return context.Add(Instruction.FP | Instruction.Maximum, Local(), a, b);
+            return context.Add(Instruction.FP32 | Instruction.Maximum, Local(), a, b);
         }
 
         public static Operand FPMinimum(this EmitterContext context, Operand a, Operand b)
         {
-            return context.Add(Instruction.FP | Instruction.Minimum, Local(), a, b);
+            return context.Add(Instruction.FP32 | Instruction.Minimum, Local(), a, b);
         }
 
-        public static Operand FPMultiply(this EmitterContext context, Operand a, Operand b)
+        public static Operand FPMultiply(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
         {
-            return context.Add(Instruction.FP | Instruction.Multiply, Local(), a, b);
+            return context.Add(fpType | Instruction.Multiply, Local(), a, b);
         }
 
-        public static Operand FPNegate(this EmitterContext context, Operand a, bool neg)
+        public static Operand FPNegate(this EmitterContext context, Operand a, bool neg, Instruction fpType = Instruction.FP32)
         {
             if (neg)
             {
-                a = context.FPNegate(a);
+                a = context.FPNegate(a, fpType);
             }
 
             return a;
         }
 
-        public static Operand FPNegate(this EmitterContext context, Operand a)
+        public static Operand FPNegate(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
         {
-            return context.Add(Instruction.FP | Instruction.Negate, Local(), a);
+            return context.Add(fpType | Instruction.Negate, Local(), a);
         }
 
         public static Operand FPReciprocal(this EmitterContext context, Operand a)
@@ -298,42 +308,42 @@ namespace Ryujinx.Graphics.Shader.Translation
 
         public static Operand FPReciprocalSquareRoot(this EmitterContext context, Operand a)
         {
-            return context.Add(Instruction.FP | Instruction.ReciprocalSquareRoot, Local(), a);
+            return context.Add(Instruction.FP32 | Instruction.ReciprocalSquareRoot, Local(), a);
         }
 
-        public static Operand FPRound(this EmitterContext context, Operand a)
+        public static Operand FPRound(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
         {
-            return context.Add(Instruction.FP | Instruction.Round, Local(), a);
+            return context.Add(fpType | Instruction.Round, Local(), a);
         }
 
-        public static Operand FPSaturate(this EmitterContext context, Operand a, bool sat)
+        public static Operand FPSaturate(this EmitterContext context, Operand a, bool sat, Instruction fpType = Instruction.FP32)
         {
             if (sat)
             {
-                a = context.FPSaturate(a);
+                a = context.FPSaturate(a, fpType);
             }
 
             return a;
         }
 
-        public static Operand FPSaturate(this EmitterContext context, Operand a)
+        public static Operand FPSaturate(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
         {
-            return context.Add(Instruction.FP | Instruction.Clamp, Local(), a, ConstF(0), ConstF(1));
+            return context.Add(fpType | Instruction.Clamp, Local(), a, ConstF(0), ConstF(1));
         }
 
         public static Operand FPSine(this EmitterContext context, Operand a)
         {
-            return context.Add(Instruction.FP | Instruction.Sine, Local(), a);
+            return context.Add(Instruction.FP32 | Instruction.Sine, Local(), a);
         }
 
         public static Operand FPSquareRoot(this EmitterContext context, Operand a)
         {
-            return context.Add(Instruction.FP | Instruction.SquareRoot, Local(), a);
+            return context.Add(Instruction.FP32 | Instruction.SquareRoot, Local(), a);
         }
 
-        public static Operand FPTruncate(this EmitterContext context, Operand a)
+        public static Operand FPTruncate(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
         {
-            return context.Add(Instruction.Truncate, Local(), a);
+            return context.Add(fpType | Instruction.Truncate, Local(), a);
         }
 
         public static Operand FPSwizzleAdd(this EmitterContext context, Operand a, Operand b, int mask)
@@ -501,6 +511,11 @@ namespace Ryujinx.Graphics.Shader.Translation
             return context.Add(Instruction.MultiplyHighU32, Local(), a, b);
         }
 
+        public static Operand PackDouble2x32(this EmitterContext context, Operand a, Operand b)
+        {
+            return context.Add(Instruction.PackDouble2x32, Local(), a, b);
+        }
+
         public static Operand PackHalf2x16(this EmitterContext context, Operand a, Operand b)
         {
             return context.Add(Instruction.PackHalf2x16, Local(), a, b);
@@ -563,6 +578,25 @@ namespace Ryujinx.Graphics.Shader.Translation
             return context.Add(Instruction.StoreShared, null, a, b);
         }
 
+        public static Operand UnpackDouble2x32High(this EmitterContext context, Operand a)
+        {
+            return UnpackDouble2x32(context, a, 1);
+        }
+
+        public static Operand UnpackDouble2x32Low(this EmitterContext context, Operand a)
+        {
+            return UnpackDouble2x32(context, a, 0);
+        }
+
+        private static Operand UnpackDouble2x32(this EmitterContext context, Operand a, int index)
+        {
+            Operand dest = Local();
+
+            context.Add(new Operation(Instruction.UnpackDouble2x32, index, dest, a));
+
+            return dest;
+        }
+
         public static Operand UnpackHalf2x16High(this EmitterContext context, Operand a)
         {
             return UnpackHalf2x16(context, a, 1);
diff --git a/Ryujinx.Graphics.Shader/Translation/Lowering.cs b/Ryujinx.Graphics.Shader/Translation/Lowering.cs
index 99aea26e68..0b5ec59273 100644
--- a/Ryujinx.Graphics.Shader/Translation/Lowering.cs
+++ b/Ryujinx.Graphics.Shader/Translation/Lowering.cs
@@ -304,7 +304,7 @@ namespace Ryujinx.Graphics.Shader.Translation
 
                     Operand coordNormalized = Local();
 
-                    node.List.AddBefore(node, new Operation(Instruction.FP | Instruction.Divide, coordNormalized, source, Float(coordSize)));
+                    node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, coordNormalized, source, Float(coordSize)));
 
                     sources[coordsIndex + index] = coordNormalized;
                 }
@@ -375,13 +375,13 @@ namespace Ryujinx.Graphics.Shader.Translation
 
                         Operand intOffset = offsets[index + (hasOffsets ? texOp.Index * coordsCount : 0)];
 
-                        node.List.AddBefore(node, new Operation(Instruction.FP | Instruction.Divide, offset, Float(intOffset), Float(coordSize)));
+                        node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, offset, Float(intOffset), Float(coordSize)));
 
                         Operand source = sources[coordsIndex + index];
 
                         Operand coordPlusOffset = Local();
 
-                        node.List.AddBefore(node, new Operation(Instruction.FP | Instruction.Add, coordPlusOffset, source, offset));
+                        node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Add, coordPlusOffset, source, offset));
 
                         sources[coordsIndex + index] = coordPlusOffset;
                     }
diff --git a/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs b/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs
index b69589294d..856a5598d6 100644
--- a/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs
+++ b/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs
@@ -101,51 +101,51 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
                     EvaluateBinary(operation, (x, y) => y != 0 ? x / y : 0);
                     break;
 
-                case Instruction.FP | Instruction.Add:
+                case Instruction.FP32 | Instruction.Add:
                     EvaluateFPBinary(operation, (x, y) => x + y);
                     break;
 
-                case Instruction.FP | Instruction.Clamp:
+                case Instruction.FP32 | Instruction.Clamp:
                     EvaluateFPTernary(operation, (x, y, z) => Math.Clamp(x, y, z));
                     break;
 
-                case Instruction.FP | Instruction.CompareEqual:
+                case Instruction.FP32 | Instruction.CompareEqual:
                     EvaluateFPBinary(operation, (x, y) => x == y);
                     break;
 
-                case Instruction.FP | Instruction.CompareGreater:
+                case Instruction.FP32 | Instruction.CompareGreater:
                     EvaluateFPBinary(operation, (x, y) => x > y);
                     break;
 
-                case Instruction.FP | Instruction.CompareGreaterOrEqual:
+                case Instruction.FP32 | Instruction.CompareGreaterOrEqual:
                     EvaluateFPBinary(operation, (x, y) => x >= y);
                     break;
 
-                case Instruction.FP | Instruction.CompareLess:
+                case Instruction.FP32 | Instruction.CompareLess:
                     EvaluateFPBinary(operation, (x, y) => x < y);
                     break;
 
-                case Instruction.FP | Instruction.CompareLessOrEqual:
+                case Instruction.FP32 | Instruction.CompareLessOrEqual:
                     EvaluateFPBinary(operation, (x, y) => x <= y);
                     break;
 
-                case Instruction.FP | Instruction.CompareNotEqual:
+                case Instruction.FP32 | Instruction.CompareNotEqual:
                     EvaluateFPBinary(operation, (x, y) => x != y);
                     break;
 
-                case Instruction.FP | Instruction.Divide:
+                case Instruction.FP32 | Instruction.Divide:
                     EvaluateFPBinary(operation, (x, y) => x / y);
                     break;
 
-                case Instruction.FP | Instruction.Multiply:
+                case Instruction.FP32 | Instruction.Multiply:
                     EvaluateFPBinary(operation, (x, y) => x * y);
                     break;
 
-                case Instruction.FP | Instruction.Negate:
+                case Instruction.FP32 | Instruction.Negate:
                     EvaluateFPUnary(operation, (x) => -x);
                     break;
 
-                case Instruction.FP | Instruction.Subtract:
+                case Instruction.FP32 | Instruction.Subtract:
                     EvaluateFPBinary(operation, (x, y) => x - y);
                     break;