From 8b44eb1c981d7106be37107755c7c71c3c3c0ce4 Mon Sep 17 00:00:00 2001
From: gdkchan <gab.dark.100@gmail.com>
Date: Wed, 7 Jul 2021 20:56:06 -0300
Subject: [PATCH] Separate GPU engines and make state follow official docs
 (part 1/2) (#2422)

* Use DeviceState for compute and i2m

* Migrate 2D class, more comments

* Migrate DMA copy engine

* Remove now unused code

* Replace GpuState by GpuAccessorState on GpuAcessor, since compute no longer has a GpuState

* More comments

* Add logging (disabled)

* Add back i2m on 3D engine
---
 Ryujinx.Common/Memory/StructArrayHelpers.cs   |   1 -
 Ryujinx.Graphics.Device/DeviceState.cs        |  21 +-
 Ryujinx.Graphics.Gpu/Engine/Compute.cs        | 159 ----
 .../Engine/Compute/ComputeClass.cs            | 213 +++++
 .../Engine/Compute/ComputeClassState.cs       | 435 +++++++++
 .../Engine/{ => Compute}/ComputeQmd.cs        |   2 +-
 .../{MethodCopyBuffer.cs => Dma/DmaClass.cs}  | 164 ++--
 .../Engine/Dma/DmaClassState.cs               | 271 ++++++
 .../Engine/GPFifo/GPFifoClassState.cs         |  45 +
 .../Engine/GPFifo/GPFifoProcessor.cs          |  21 +-
 .../InlineToMemory/InlineToMemoryClass.cs     | 211 +++++
 .../InlineToMemoryClassState.cs               | 181 ++++
 .../Engine/MethodUniformBufferUpdate.cs       |   2 +-
 Ryujinx.Graphics.Gpu/Engine/Methods.cs        |  31 +-
 .../Engine/MmeShadowScratch.cs                |  15 +
 .../TwodClass.cs}                             |  88 +-
 .../Engine/Twod/TwodClassState.cs             | 827 ++++++++++++++++++
 .../Image/TextureBindingsManager.cs           |  20 +-
 Ryujinx.Graphics.Gpu/Image/TextureCache.cs    |  25 +-
 Ryujinx.Graphics.Gpu/Image/TextureManager.cs  |  22 +-
 Ryujinx.Graphics.Gpu/Memory/BufferCache.cs    |  10 +-
 Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs    |  38 +-
 .../Shader/GpuAccessorState.cs                |  43 +
 Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs    |  31 +-
 .../State/BufferSwizzleComponent.cs           |  16 -
 .../State/CopyBufferParams.cs                 |  17 -
 .../State/CopyBufferSwizzle.cs                |  75 --
 .../State/CopyTextureControl.cs               |  22 -
 Ryujinx.Graphics.Gpu/State/GpuState.cs        |  33 +-
 Ryujinx.Graphics.Gpu/State/SbDescriptor.cs    |  20 +
 30 files changed, 2599 insertions(+), 460 deletions(-)
 delete mode 100644 Ryujinx.Graphics.Gpu/Engine/Compute.cs
 create mode 100644 Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs
 create mode 100644 Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClassState.cs
 rename Ryujinx.Graphics.Gpu/Engine/{ => Compute}/ComputeQmd.cs (99%)
 rename Ryujinx.Graphics.Gpu/Engine/{MethodCopyBuffer.cs => Dma/DmaClass.cs} (51%)
 create mode 100644 Ryujinx.Graphics.Gpu/Engine/Dma/DmaClassState.cs
 create mode 100644 Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs
 create mode 100644 Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClassState.cs
 create mode 100644 Ryujinx.Graphics.Gpu/Engine/MmeShadowScratch.cs
 rename Ryujinx.Graphics.Gpu/Engine/{MethodCopyTexture.cs => Twod/TwodClass.cs} (57%)
 create mode 100644 Ryujinx.Graphics.Gpu/Engine/Twod/TwodClassState.cs
 create mode 100644 Ryujinx.Graphics.Gpu/Shader/GpuAccessorState.cs
 delete mode 100644 Ryujinx.Graphics.Gpu/State/BufferSwizzleComponent.cs
 delete mode 100644 Ryujinx.Graphics.Gpu/State/CopyBufferParams.cs
 delete mode 100644 Ryujinx.Graphics.Gpu/State/CopyBufferSwizzle.cs
 delete mode 100644 Ryujinx.Graphics.Gpu/State/CopyTextureControl.cs
 create mode 100644 Ryujinx.Graphics.Gpu/State/SbDescriptor.cs

diff --git a/Ryujinx.Common/Memory/StructArrayHelpers.cs b/Ryujinx.Common/Memory/StructArrayHelpers.cs
index 1498ac4717..fbb2902d5c 100644
--- a/Ryujinx.Common/Memory/StructArrayHelpers.cs
+++ b/Ryujinx.Common/Memory/StructArrayHelpers.cs
@@ -640,5 +640,4 @@ namespace Ryujinx.Common.Memory
         public ref T this[int index] => ref ToSpan()[index];
         public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 64);
     }
-
 }
diff --git a/Ryujinx.Graphics.Device/DeviceState.cs b/Ryujinx.Graphics.Device/DeviceState.cs
index 740d8589c8..1001d29500 100644
--- a/Ryujinx.Graphics.Device/DeviceState.cs
+++ b/Ryujinx.Graphics.Device/DeviceState.cs
@@ -20,7 +20,10 @@ namespace Ryujinx.Graphics.Device
         private readonly Dictionary<int, Func<int>> _readCallbacks;
         private readonly Dictionary<int, Action<int>> _writeCallbacks;
 
-        public DeviceState(IReadOnlyDictionary<string, RwCallback> callbacks = null)
+        private readonly Dictionary<int, string> _fieldNamesForDebug;
+        private readonly Action<string> _debugLogCallback;
+
+        public DeviceState(IReadOnlyDictionary<string, RwCallback> callbacks = null, Action<string> debugLogCallback = null)
         {
             int size = (Unsafe.SizeOf<TState>() + RegisterSize - 1) / RegisterSize;
 
@@ -30,6 +33,12 @@ namespace Ryujinx.Graphics.Device
             _readCallbacks = new Dictionary<int, Func<int>>();
             _writeCallbacks = new Dictionary<int, Action<int>>();
 
+            if (debugLogCallback != null)
+            {
+                _fieldNamesForDebug = new Dictionary<int, string>();
+                _debugLogCallback = debugLogCallback;
+            }
+
             var fields = typeof(TState).GetFields();
             int offset = 0;
 
@@ -59,6 +68,11 @@ namespace Ryujinx.Graphics.Device
                     }
                 }
 
+                if (debugLogCallback != null)
+                {
+                    _fieldNamesForDebug.Add(offset, field.Name);
+                }
+
                 offset += sizeOfField;
             }
 
@@ -90,6 +104,11 @@ namespace Ryujinx.Graphics.Device
             {
                 int alignedOffset = Align(offset);
 
+                if (_fieldNamesForDebug != null && _fieldNamesForDebug.TryGetValue(alignedOffset, out string fieldName))
+                {
+                    _debugLogCallback($"{typeof(TState).Name}.{fieldName} = 0x{data:X}");
+                }
+
                 GetRef<int>(alignedOffset) = data;
 
                 if (_writeCallbacks.TryGetValue(alignedOffset, out Action<int> write))
diff --git a/Ryujinx.Graphics.Gpu/Engine/Compute.cs b/Ryujinx.Graphics.Gpu/Engine/Compute.cs
deleted file mode 100644
index 26041ecc59..0000000000
--- a/Ryujinx.Graphics.Gpu/Engine/Compute.cs
+++ /dev/null
@@ -1,159 +0,0 @@
-using Ryujinx.Graphics.GAL;
-using Ryujinx.Graphics.Gpu.Image;
-using Ryujinx.Graphics.Gpu.Shader;
-using Ryujinx.Graphics.Gpu.State;
-using Ryujinx.Graphics.Shader;
-using System;
-
-namespace Ryujinx.Graphics.Gpu.Engine
-{
-    partial class Methods
-    {
-        /// <summary>
-        /// Dispatches compute work.
-        /// </summary>
-        /// <param name="state">Current GPU state</param>
-        /// <param name="argument">Method call argument</param>
-        public void Dispatch(GpuState state, int argument)
-        {
-            var memoryManager = state.Channel.MemoryManager;
-
-            FlushUboDirty(memoryManager);
-
-            uint qmdAddress = (uint)state.Get<int>(MethodOffset.DispatchParamsAddress);
-
-            var qmd = state.Channel.MemoryManager.Read<ComputeQmd>((ulong)qmdAddress << 8);
-
-            GpuVa shaderBaseAddress = state.Get<GpuVa>(MethodOffset.ShaderBaseAddress);
-
-            ulong shaderGpuVa = shaderBaseAddress.Pack() + (uint)qmd.ProgramOffset;
-
-            int localMemorySize = qmd.ShaderLocalMemoryLowSize + qmd.ShaderLocalMemoryHighSize;
-
-            int sharedMemorySize = Math.Min(qmd.SharedMemorySize, _context.Capabilities.MaximumComputeSharedMemorySize);
-
-            for (int index = 0; index < Constants.TotalCpUniformBuffers; index++)
-            {
-                if (!qmd.ConstantBufferValid(index))
-                {
-                    continue;
-                }
-
-                ulong gpuVa = (uint)qmd.ConstantBufferAddrLower(index) | (ulong)qmd.ConstantBufferAddrUpper(index) << 32;
-                ulong size = (ulong)qmd.ConstantBufferSize(index);
-
-                state.Channel.BufferManager.SetComputeUniformBuffer(index, gpuVa, size);
-            }
-
-            ShaderBundle cs = memoryManager.Physical.ShaderCache.GetComputeShader(
-                state,
-                shaderGpuVa,
-                qmd.CtaThreadDimension0,
-                qmd.CtaThreadDimension1,
-                qmd.CtaThreadDimension2,
-                localMemorySize,
-                sharedMemorySize);
-
-            _context.Renderer.Pipeline.SetProgram(cs.HostProgram);
-
-            var samplerPool = state.Get<PoolState>(MethodOffset.SamplerPoolState);
-            var texturePool = state.Get<PoolState>(MethodOffset.TexturePoolState);
-
-            state.Channel.TextureManager.SetComputeSamplerPool(samplerPool.Address.Pack(), samplerPool.MaximumId, qmd.SamplerIndex);
-            state.Channel.TextureManager.SetComputeTexturePool(texturePool.Address.Pack(), texturePool.MaximumId);
-            state.Channel.TextureManager.SetComputeTextureBufferIndex(state.Get<int>(MethodOffset.TextureBufferIndex));
-
-            ShaderProgramInfo info = cs.Shaders[0].Info;
-
-            for (int index = 0; index < info.CBuffers.Count; index++)
-            {
-                BufferDescriptor cb = info.CBuffers[index];
-
-                // NVN uses the "hardware" constant buffer for anything that is less than 8,
-                // and those are already bound above.
-                // Anything greater than or equal to 8 uses the emulated constant buffers.
-                // They are emulated using global memory loads.
-                if (cb.Slot < 8)
-                {
-                    continue;
-                }
-
-                ulong cbDescAddress = state.Channel.BufferManager.GetComputeUniformBufferAddress(0);
-
-                int cbDescOffset = 0x260 + (cb.Slot - 8) * 0x10;
-
-                cbDescAddress += (ulong)cbDescOffset;
-
-                SbDescriptor cbDescriptor = state.Channel.MemoryManager.Physical.Read<SbDescriptor>(cbDescAddress);
-
-                state.Channel.BufferManager.SetComputeUniformBuffer(cb.Slot, cbDescriptor.PackAddress(), (uint)cbDescriptor.Size);
-            }
-
-            for (int index = 0; index < info.SBuffers.Count; index++)
-            {
-                BufferDescriptor sb = info.SBuffers[index];
-
-                ulong sbDescAddress = state.Channel.BufferManager.GetComputeUniformBufferAddress(0);
-
-                int sbDescOffset = 0x310 + sb.Slot * 0x10;
-
-                sbDescAddress += (ulong)sbDescOffset;
-
-                SbDescriptor sbDescriptor = state.Channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress);
-
-                state.Channel.BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags);
-            }
-
-            state.Channel.BufferManager.SetComputeStorageBufferBindings(info.SBuffers);
-            state.Channel.BufferManager.SetComputeUniformBufferBindings(info.CBuffers);
-
-            var textureBindings = new TextureBindingInfo[info.Textures.Count];
-
-            for (int index = 0; index < info.Textures.Count; index++)
-            {
-                var descriptor = info.Textures[index];
-
-                Target target = ShaderTexture.GetTarget(descriptor.Type);
-
-                textureBindings[index] = new TextureBindingInfo(
-                    target,
-                    descriptor.Binding,
-                    descriptor.CbufSlot,
-                    descriptor.HandleIndex,
-                    descriptor.Flags);
-            }
-
-            state.Channel.TextureManager.SetComputeTextures(textureBindings);
-
-            var imageBindings = new TextureBindingInfo[info.Images.Count];
-
-            for (int index = 0; index < info.Images.Count; index++)
-            {
-                var descriptor = info.Images[index];
-
-                Target target = ShaderTexture.GetTarget(descriptor.Type);
-                Format format = ShaderTexture.GetFormat(descriptor.Format);
-
-                imageBindings[index] = new TextureBindingInfo(
-                    target,
-                    format,
-                    descriptor.Binding,
-                    descriptor.CbufSlot,
-                    descriptor.HandleIndex,
-                    descriptor.Flags);
-            }
-
-            state.Channel.TextureManager.SetComputeImages(imageBindings);
-
-            state.Channel.TextureManager.CommitComputeBindings();
-            state.Channel.BufferManager.CommitComputeBindings();
-
-            _context.Renderer.Pipeline.DispatchCompute(
-                qmd.CtaRasterWidth,
-                qmd.CtaRasterHeight,
-                qmd.CtaRasterDepth);
-
-            _forceShaderUpdate = true;
-        }
-    }
-}
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs b/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs
new file mode 100644
index 0000000000..5d1fd2c0d3
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs
@@ -0,0 +1,213 @@
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Engine.InlineToMemory;
+using Ryujinx.Graphics.Gpu.Image;
+using Ryujinx.Graphics.Gpu.Shader;
+using Ryujinx.Graphics.Gpu.State;
+using Ryujinx.Graphics.Shader;
+using System;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Compute
+{
+    /// <summary>
+    /// Represents a compute engine class.
+    /// </summary>
+    class ComputeClass : InlineToMemoryClass, IDeviceState
+    {
+        private readonly GpuContext _context;
+        private readonly GpuChannel _channel;
+        private readonly DeviceState<ComputeClassState> _state;
+
+        /// <summary>
+        /// Creates a new instance of the compute engine class.
+        /// </summary>
+        /// <param name="context">GPU context</param>
+        /// <param name="channel">GPU channel</param>
+        public ComputeClass(GpuContext context, GpuChannel channel) : base(context, channel, false)
+        {
+            _context = context;
+            _channel = channel;
+            _state = new DeviceState<ComputeClassState>(new Dictionary<string, RwCallback>
+            {
+                { nameof(ComputeClassState.LaunchDma), new RwCallback(LaunchDma, null) },
+                { nameof(ComputeClassState.LoadInlineData), new RwCallback(LoadInlineData, null) },
+                { nameof(ComputeClassState.SendSignalingPcasB), new RwCallback(SendSignalingPcasB, null) }
+            });
+        }
+
+        /// <summary>
+        /// Reads data from the class registers.
+        /// </summary>
+        /// <param name="offset">Register byte offset</param>
+        /// <returns>Data at the specified offset</returns>
+        public override int Read(int offset) => _state.Read(offset);
+
+        /// <summary>
+        /// Writes data to the class registers.
+        /// </summary>
+        /// <param name="offset">Register byte offset</param>
+        /// <param name="data">Data to be written</param>
+        public override void Write(int offset, int data) => _state.Write(offset, data);
+
+        /// <summary>
+        /// Launches the Inline-to-Memory DMA copy operation.
+        /// </summary>
+        /// <param name="argument">Method call argument</param>
+        protected override void LaunchDma(int argument)
+        {
+            LaunchDma(ref Unsafe.As<ComputeClassState, InlineToMemoryClassState>(ref _state.State), argument);
+        }
+
+        /// <summary>
+        /// Performs the compute dispatch operation.
+        /// </summary>
+        /// <param name="argument">Method call argument</param>
+        private void SendSignalingPcasB(int argument)
+        {
+            var memoryManager = _channel.MemoryManager;
+
+            _context.Methods.FlushUboDirty(memoryManager);
+
+            uint qmdAddress = _state.State.SendPcasA;
+
+            var qmd = _channel.MemoryManager.Read<ComputeQmd>((ulong)qmdAddress << 8);
+
+            ulong shaderGpuVa = ((ulong)_state.State.SetProgramRegionAAddressUpper << 32) | _state.State.SetProgramRegionB;
+
+            shaderGpuVa += (uint)qmd.ProgramOffset;
+
+            int localMemorySize = qmd.ShaderLocalMemoryLowSize + qmd.ShaderLocalMemoryHighSize;
+
+            int sharedMemorySize = Math.Min(qmd.SharedMemorySize, _context.Capabilities.MaximumComputeSharedMemorySize);
+
+            for (int index = 0; index < Constants.TotalCpUniformBuffers; index++)
+            {
+                if (!qmd.ConstantBufferValid(index))
+                {
+                    continue;
+                }
+
+                ulong gpuVa = (uint)qmd.ConstantBufferAddrLower(index) | (ulong)qmd.ConstantBufferAddrUpper(index) << 32;
+                ulong size = (ulong)qmd.ConstantBufferSize(index);
+
+                _channel.BufferManager.SetComputeUniformBuffer(index, gpuVa, size);
+            }
+
+            ulong samplerPoolGpuVa = ((ulong)_state.State.SetTexSamplerPoolAOffsetUpper << 32) | _state.State.SetTexSamplerPoolB;
+            ulong texturePoolGpuVa = ((ulong)_state.State.SetTexHeaderPoolAOffsetUpper << 32) | _state.State.SetTexHeaderPoolB;
+
+            GpuAccessorState gas = new GpuAccessorState(
+                texturePoolGpuVa,
+                _state.State.SetTexHeaderPoolCMaximumIndex,
+                _state.State.SetBindlessTextureConstantBufferSlotSelect,
+                false);
+
+            ShaderBundle cs = memoryManager.Physical.ShaderCache.GetComputeShader(
+                _channel,
+                gas,
+                shaderGpuVa,
+                qmd.CtaThreadDimension0,
+                qmd.CtaThreadDimension1,
+                qmd.CtaThreadDimension2,
+                localMemorySize,
+                sharedMemorySize);
+
+            _context.Renderer.Pipeline.SetProgram(cs.HostProgram);
+
+            _channel.TextureManager.SetComputeSamplerPool(samplerPoolGpuVa, _state.State.SetTexSamplerPoolCMaximumIndex, qmd.SamplerIndex);
+            _channel.TextureManager.SetComputeTexturePool(texturePoolGpuVa, _state.State.SetTexHeaderPoolCMaximumIndex);
+            _channel.TextureManager.SetComputeTextureBufferIndex(_state.State.SetBindlessTextureConstantBufferSlotSelect);
+
+            ShaderProgramInfo info = cs.Shaders[0].Info;
+
+            for (int index = 0; index < info.CBuffers.Count; index++)
+            {
+                BufferDescriptor cb = info.CBuffers[index];
+
+                // NVN uses the "hardware" constant buffer for anything that is less than 8,
+                // and those are already bound above.
+                // Anything greater than or equal to 8 uses the emulated constant buffers.
+                // They are emulated using global memory loads.
+                if (cb.Slot < 8)
+                {
+                    continue;
+                }
+
+                ulong cbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0);
+
+                int cbDescOffset = 0x260 + (cb.Slot - 8) * 0x10;
+
+                cbDescAddress += (ulong)cbDescOffset;
+
+                SbDescriptor cbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(cbDescAddress);
+
+                _channel.BufferManager.SetComputeUniformBuffer(cb.Slot, cbDescriptor.PackAddress(), (uint)cbDescriptor.Size);
+            }
+
+            for (int index = 0; index < info.SBuffers.Count; index++)
+            {
+                BufferDescriptor sb = info.SBuffers[index];
+
+                ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0);
+
+                int sbDescOffset = 0x310 + sb.Slot * 0x10;
+
+                sbDescAddress += (ulong)sbDescOffset;
+
+                SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress);
+
+                _channel.BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags);
+            }
+
+            _channel.BufferManager.SetComputeStorageBufferBindings(info.SBuffers);
+            _channel.BufferManager.SetComputeUniformBufferBindings(info.CBuffers);
+
+            var textureBindings = new TextureBindingInfo[info.Textures.Count];
+
+            for (int index = 0; index < info.Textures.Count; index++)
+            {
+                var descriptor = info.Textures[index];
+
+                Target target = ShaderTexture.GetTarget(descriptor.Type);
+
+                textureBindings[index] = new TextureBindingInfo(
+                    target,
+                    descriptor.Binding,
+                    descriptor.CbufSlot,
+                    descriptor.HandleIndex,
+                    descriptor.Flags);
+            }
+
+            _channel.TextureManager.SetComputeTextures(textureBindings);
+
+            var imageBindings = new TextureBindingInfo[info.Images.Count];
+
+            for (int index = 0; index < info.Images.Count; index++)
+            {
+                var descriptor = info.Images[index];
+
+                Target target = ShaderTexture.GetTarget(descriptor.Type);
+                Format format = ShaderTexture.GetFormat(descriptor.Format);
+
+                imageBindings[index] = new TextureBindingInfo(
+                    target,
+                    format,
+                    descriptor.Binding,
+                    descriptor.CbufSlot,
+                    descriptor.HandleIndex,
+                    descriptor.Flags);
+            }
+
+            _channel.TextureManager.SetComputeImages(imageBindings);
+
+            _channel.TextureManager.CommitComputeBindings();
+            _channel.BufferManager.CommitComputeBindings();
+
+            _context.Renderer.Pipeline.DispatchCompute(qmd.CtaRasterWidth, qmd.CtaRasterHeight, qmd.CtaRasterDepth);
+
+            _context.Methods.ForceShaderUpdate();
+        }
+    }
+}
diff --git a/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClassState.cs b/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClassState.cs
new file mode 100644
index 0000000000..5d81de5de6
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClassState.cs
@@ -0,0 +1,435 @@
+// This file was auto-generated from NVIDIA official Maxwell definitions.
+
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Gpu.Engine.InlineToMemory;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Compute
+{
+    /// <summary>
+    /// Notify type.
+    /// </summary>
+    enum NotifyType
+    {
+        WriteOnly = 0,
+        WriteThenAwaken = 1,
+    }
+
+    /// <summary>
+    /// CWD control SM selection.
+    /// </summary>
+    enum SetCwdControlSmSelection
+    {
+        LoadBalanced = 0,
+        RoundRobin = 1,
+    }
+
+    /// <summary>
+    /// Cache lines to invalidate.
+    /// </summary>
+    enum InvalidateCacheLines
+    {
+        All = 0,
+        One = 1,
+    }
+
+    /// <summary>
+    /// GWC SCG type.
+    /// </summary>
+    enum SetGwcScgTypeScgType
+    {
+        GraphicsCompute0 = 0,
+        Compute1 = 1,
+    }
+
+    /// <summary>
+    /// Render enable override mode.
+    /// </summary>
+    enum SetRenderEnableOverrideMode
+    {
+        UseRenderEnable = 0,
+        AlwaysRender = 1,
+        NeverRender = 2,
+    }
+
+    /// <summary>
+    /// Semaphore report operation.
+    /// </summary>
+    enum SetReportSemaphoreDOperation
+    {
+        Release = 0,
+        Trap = 3,
+    }
+
+    /// <summary>
+    /// Semaphore report structure size.
+    /// </summary>
+    enum SetReportSemaphoreDStructureSize
+    {
+        FourWords = 0,
+        OneWord = 1,
+    }
+
+    /// <summary>
+    /// Semaphore report reduction operation.
+    /// </summary>
+    enum SetReportSemaphoreDReductionOp
+    {
+        RedAdd = 0,
+        RedMin = 1,
+        RedMax = 2,
+        RedInc = 3,
+        RedDec = 4,
+        RedAnd = 5,
+        RedOr = 6,
+        RedXor = 7,
+    }
+
+    /// <summary>
+    /// Semaphore report reduction format.
+    /// </summary>
+    enum SetReportSemaphoreDReductionFormat
+    {
+        Unsigned32 = 0,
+        Signed32 = 1,
+    }
+
+    /// <summary>
+    /// Compute class state.
+    /// </summary>
+    unsafe struct ComputeClassState
+    {
+#pragma warning disable CS0649
+        public uint SetObject;
+        public int SetObjectClassId => (int)((SetObject >> 0) & 0xFFFF);
+        public int SetObjectEngineId => (int)((SetObject >> 16) & 0x1F);
+        public fixed uint Reserved04[63];
+        public uint NoOperation;
+        public uint SetNotifyA;
+        public int SetNotifyAAddressUpper => (int)((SetNotifyA >> 0) & 0xFF);
+        public uint SetNotifyB;
+        public uint Notify;
+        public NotifyType NotifyType => (NotifyType)(Notify);
+        public uint WaitForIdle;
+        public fixed uint Reserved114[7];
+        public uint SetGlobalRenderEnableA;
+        public int SetGlobalRenderEnableAOffsetUpper => (int)((SetGlobalRenderEnableA >> 0) & 0xFF);
+        public uint SetGlobalRenderEnableB;
+        public uint SetGlobalRenderEnableC;
+        public int SetGlobalRenderEnableCMode => (int)((SetGlobalRenderEnableC >> 0) & 0x7);
+        public uint SendGoIdle;
+        public uint PmTrigger;
+        public uint PmTriggerWfi;
+        public fixed uint Reserved148[2];
+        public uint SetInstrumentationMethodHeader;
+        public uint SetInstrumentationMethodData;
+        public fixed uint Reserved158[10];
+        public uint LineLengthIn;
+        public uint LineCount;
+        public uint OffsetOutUpper;
+        public int OffsetOutUpperValue => (int)((OffsetOutUpper >> 0) & 0xFF);
+        public uint OffsetOut;
+        public uint PitchOut;
+        public uint SetDstBlockSize;
+        public SetDstBlockSizeWidth SetDstBlockSizeWidth => (SetDstBlockSizeWidth)((SetDstBlockSize >> 0) & 0xF);
+        public SetDstBlockSizeHeight SetDstBlockSizeHeight => (SetDstBlockSizeHeight)((SetDstBlockSize >> 4) & 0xF);
+        public SetDstBlockSizeDepth SetDstBlockSizeDepth => (SetDstBlockSizeDepth)((SetDstBlockSize >> 8) & 0xF);
+        public uint SetDstWidth;
+        public uint SetDstHeight;
+        public uint SetDstDepth;
+        public uint SetDstLayer;
+        public uint SetDstOriginBytesX;
+        public int SetDstOriginBytesXV => (int)((SetDstOriginBytesX >> 0) & 0xFFFFF);
+        public uint SetDstOriginSamplesY;
+        public int SetDstOriginSamplesYV => (int)((SetDstOriginSamplesY >> 0) & 0xFFFF);
+        public uint LaunchDma;
+        public LaunchDmaDstMemoryLayout LaunchDmaDstMemoryLayout => (LaunchDmaDstMemoryLayout)((LaunchDma >> 0) & 0x1);
+        public LaunchDmaCompletionType LaunchDmaCompletionType => (LaunchDmaCompletionType)((LaunchDma >> 4) & 0x3);
+        public LaunchDmaInterruptType LaunchDmaInterruptType => (LaunchDmaInterruptType)((LaunchDma >> 8) & 0x3);
+        public LaunchDmaSemaphoreStructSize LaunchDmaSemaphoreStructSize => (LaunchDmaSemaphoreStructSize)((LaunchDma >> 12) & 0x1);
+        public bool LaunchDmaReductionEnable => (LaunchDma & 0x2) != 0;
+        public LaunchDmaReductionOp LaunchDmaReductionOp => (LaunchDmaReductionOp)((LaunchDma >> 13) & 0x7);
+        public LaunchDmaReductionFormat LaunchDmaReductionFormat => (LaunchDmaReductionFormat)((LaunchDma >> 2) & 0x3);
+        public bool LaunchDmaSysmembarDisable => (LaunchDma & 0x40) != 0;
+        public uint LoadInlineData;
+        public fixed uint Reserved1B8[9];
+        public uint SetI2mSemaphoreA;
+        public int SetI2mSemaphoreAOffsetUpper => (int)((SetI2mSemaphoreA >> 0) & 0xFF);
+        public uint SetI2mSemaphoreB;
+        public uint SetI2mSemaphoreC;
+        public fixed uint Reserved1E8[2];
+        public uint SetI2mSpareNoop00;
+        public uint SetI2mSpareNoop01;
+        public uint SetI2mSpareNoop02;
+        public uint SetI2mSpareNoop03;
+        public uint SetValidSpanOverflowAreaA;
+        public int SetValidSpanOverflowAreaAAddressUpper => (int)((SetValidSpanOverflowAreaA >> 0) & 0xFF);
+        public uint SetValidSpanOverflowAreaB;
+        public uint SetValidSpanOverflowAreaC;
+        public uint SetCoalesceWaitingPeriodUnit;
+        public uint PerfmonTransfer;
+        public uint SetShaderSharedMemoryWindow;
+        public uint SetSelectMaxwellTextureHeaders;
+        public bool SetSelectMaxwellTextureHeadersV => (SetSelectMaxwellTextureHeaders & 0x1) != 0;
+        public uint InvalidateShaderCaches;
+        public bool InvalidateShaderCachesInstruction => (InvalidateShaderCaches & 0x1) != 0;
+        public bool InvalidateShaderCachesData => (InvalidateShaderCaches & 0x10) != 0;
+        public bool InvalidateShaderCachesConstant => (InvalidateShaderCaches & 0x1000) != 0;
+        public bool InvalidateShaderCachesLocks => (InvalidateShaderCaches & 0x2) != 0;
+        public bool InvalidateShaderCachesFlushData => (InvalidateShaderCaches & 0x4) != 0;
+        public uint SetReservedSwMethod00;
+        public uint SetReservedSwMethod01;
+        public uint SetReservedSwMethod02;
+        public uint SetReservedSwMethod03;
+        public uint SetReservedSwMethod04;
+        public uint SetReservedSwMethod05;
+        public uint SetReservedSwMethod06;
+        public uint SetReservedSwMethod07;
+        public uint SetCwdControl;
+        public SetCwdControlSmSelection SetCwdControlSmSelection => (SetCwdControlSmSelection)((SetCwdControl >> 0) & 0x1);
+        public uint InvalidateTextureHeaderCacheNoWfi;
+        public InvalidateCacheLines InvalidateTextureHeaderCacheNoWfiLines => (InvalidateCacheLines)((InvalidateTextureHeaderCacheNoWfi >> 0) & 0x1);
+        public int InvalidateTextureHeaderCacheNoWfiTag => (int)((InvalidateTextureHeaderCacheNoWfi >> 4) & 0x3FFFFF);
+        public uint SetCwdRefCounter;
+        public int SetCwdRefCounterSelect => (int)((SetCwdRefCounter >> 0) & 0x3F);
+        public int SetCwdRefCounterValue => (int)((SetCwdRefCounter >> 8) & 0xFFFF);
+        public uint SetReservedSwMethod08;
+        public uint SetReservedSwMethod09;
+        public uint SetReservedSwMethod10;
+        public uint SetReservedSwMethod11;
+        public uint SetReservedSwMethod12;
+        public uint SetReservedSwMethod13;
+        public uint SetReservedSwMethod14;
+        public uint SetReservedSwMethod15;
+        public uint SetGwcScgType;
+        public SetGwcScgTypeScgType SetGwcScgTypeScgType => (SetGwcScgTypeScgType)((SetGwcScgType >> 0) & 0x1);
+        public uint SetScgControl;
+        public int SetScgControlCompute1MaxSmCount => (int)((SetScgControl >> 0) & 0x1FF);
+        public uint InvalidateConstantBufferCacheA;
+        public int InvalidateConstantBufferCacheAAddressUpper => (int)((InvalidateConstantBufferCacheA >> 0) & 0xFF);
+        public uint InvalidateConstantBufferCacheB;
+        public uint InvalidateConstantBufferCacheC;
+        public int InvalidateConstantBufferCacheCByteCount => (int)((InvalidateConstantBufferCacheC >> 0) & 0x1FFFF);
+        public bool InvalidateConstantBufferCacheCThruL2 => (InvalidateConstantBufferCacheC & 0x80000000) != 0;
+        public uint SetComputeClassVersion;
+        public int SetComputeClassVersionCurrent => (int)((SetComputeClassVersion >> 0) & 0xFFFF);
+        public int SetComputeClassVersionOldestSupported => (int)((SetComputeClassVersion >> 16) & 0xFFFF);
+        public uint CheckComputeClassVersion;
+        public int CheckComputeClassVersionCurrent => (int)((CheckComputeClassVersion >> 0) & 0xFFFF);
+        public int CheckComputeClassVersionOldestSupported => (int)((CheckComputeClassVersion >> 16) & 0xFFFF);
+        public uint SetQmdVersion;
+        public int SetQmdVersionCurrent => (int)((SetQmdVersion >> 0) & 0xFFFF);
+        public int SetQmdVersionOldestSupported => (int)((SetQmdVersion >> 16) & 0xFFFF);
+        public uint SetWfiConfig;
+        public bool SetWfiConfigEnableScgTypeWfi => (SetWfiConfig & 0x1) != 0;
+        public uint CheckQmdVersion;
+        public int CheckQmdVersionCurrent => (int)((CheckQmdVersion >> 0) & 0xFFFF);
+        public int CheckQmdVersionOldestSupported => (int)((CheckQmdVersion >> 16) & 0xFFFF);
+        public uint WaitForIdleScgType;
+        public uint InvalidateSkedCaches;
+        public bool InvalidateSkedCachesV => (InvalidateSkedCaches & 0x1) != 0;
+        public uint SetScgRenderEnableControl;
+        public bool SetScgRenderEnableControlCompute1UsesRenderEnable => (SetScgRenderEnableControl & 0x1) != 0;
+        public fixed uint Reserved2A0[4];
+        public uint SetCwdSlotCount;
+        public int SetCwdSlotCountV => (int)((SetCwdSlotCount >> 0) & 0xFF);
+        public uint SendPcasA;
+        public uint SendPcasB;
+        public int SendPcasBFrom => (int)((SendPcasB >> 0) & 0xFFFFFF);
+        public int SendPcasBDelta => (int)((SendPcasB >> 24) & 0xFF);
+        public uint SendSignalingPcasB;
+        public bool SendSignalingPcasBInvalidate => (SendSignalingPcasB & 0x1) != 0;
+        public bool SendSignalingPcasBSchedule => (SendSignalingPcasB & 0x2) != 0;
+        public fixed uint Reserved2C0[9];
+        public uint SetShaderLocalMemoryNonThrottledA;
+        public int SetShaderLocalMemoryNonThrottledASizeUpper => (int)((SetShaderLocalMemoryNonThrottledA >> 0) & 0xFF);
+        public uint SetShaderLocalMemoryNonThrottledB;
+        public uint SetShaderLocalMemoryNonThrottledC;
+        public int SetShaderLocalMemoryNonThrottledCMaxSmCount => (int)((SetShaderLocalMemoryNonThrottledC >> 0) & 0x1FF);
+        public uint SetShaderLocalMemoryThrottledA;
+        public int SetShaderLocalMemoryThrottledASizeUpper => (int)((SetShaderLocalMemoryThrottledA >> 0) & 0xFF);
+        public uint SetShaderLocalMemoryThrottledB;
+        public uint SetShaderLocalMemoryThrottledC;
+        public int SetShaderLocalMemoryThrottledCMaxSmCount => (int)((SetShaderLocalMemoryThrottledC >> 0) & 0x1FF);
+        public fixed uint Reserved2FC[5];
+        public uint SetSpaVersion;
+        public int SetSpaVersionMinor => (int)((SetSpaVersion >> 0) & 0xFF);
+        public int SetSpaVersionMajor => (int)((SetSpaVersion >> 8) & 0xFF);
+        public fixed uint Reserved314[123];
+        public uint SetFalcon00;
+        public uint SetFalcon01;
+        public uint SetFalcon02;
+        public uint SetFalcon03;
+        public uint SetFalcon04;
+        public uint SetFalcon05;
+        public uint SetFalcon06;
+        public uint SetFalcon07;
+        public uint SetFalcon08;
+        public uint SetFalcon09;
+        public uint SetFalcon10;
+        public uint SetFalcon11;
+        public uint SetFalcon12;
+        public uint SetFalcon13;
+        public uint SetFalcon14;
+        public uint SetFalcon15;
+        public uint SetFalcon16;
+        public uint SetFalcon17;
+        public uint SetFalcon18;
+        public uint SetFalcon19;
+        public uint SetFalcon20;
+        public uint SetFalcon21;
+        public uint SetFalcon22;
+        public uint SetFalcon23;
+        public uint SetFalcon24;
+        public uint SetFalcon25;
+        public uint SetFalcon26;
+        public uint SetFalcon27;
+        public uint SetFalcon28;
+        public uint SetFalcon29;
+        public uint SetFalcon30;
+        public uint SetFalcon31;
+        public fixed uint Reserved580[127];
+        public uint SetShaderLocalMemoryWindow;
+        public fixed uint Reserved780[4];
+        public uint SetShaderLocalMemoryA;
+        public int SetShaderLocalMemoryAAddressUpper => (int)((SetShaderLocalMemoryA >> 0) & 0xFF);
+        public uint SetShaderLocalMemoryB;
+        public fixed uint Reserved798[383];
+        public uint SetShaderCacheControl;
+        public bool SetShaderCacheControlIcachePrefetchEnable => (SetShaderCacheControl & 0x1) != 0;
+        public fixed uint ReservedD98[19];
+        public uint SetSmTimeoutInterval;
+        public int SetSmTimeoutIntervalCounterBit => (int)((SetSmTimeoutInterval >> 0) & 0x3F);
+        public fixed uint ReservedDE8[87];
+        public uint SetSpareNoop12;
+        public uint SetSpareNoop13;
+        public uint SetSpareNoop14;
+        public uint SetSpareNoop15;
+        public fixed uint ReservedF54[59];
+        public uint SetSpareNoop00;
+        public uint SetSpareNoop01;
+        public uint SetSpareNoop02;
+        public uint SetSpareNoop03;
+        public uint SetSpareNoop04;
+        public uint SetSpareNoop05;
+        public uint SetSpareNoop06;
+        public uint SetSpareNoop07;
+        public uint SetSpareNoop08;
+        public uint SetSpareNoop09;
+        public uint SetSpareNoop10;
+        public uint SetSpareNoop11;
+        public fixed uint Reserved1070[103];
+        public uint InvalidateSamplerCacheAll;
+        public bool InvalidateSamplerCacheAllV => (InvalidateSamplerCacheAll & 0x1) != 0;
+        public uint InvalidateTextureHeaderCacheAll;
+        public bool InvalidateTextureHeaderCacheAllV => (InvalidateTextureHeaderCacheAll & 0x1) != 0;
+        public fixed uint Reserved1214[29];
+        public uint InvalidateTextureDataCacheNoWfi;
+        public InvalidateCacheLines InvalidateTextureDataCacheNoWfiLines => (InvalidateCacheLines)((InvalidateTextureDataCacheNoWfi >> 0) & 0x1);
+        public int InvalidateTextureDataCacheNoWfiTag => (int)((InvalidateTextureDataCacheNoWfi >> 4) & 0x3FFFFF);
+        public fixed uint Reserved128C[7];
+        public uint ActivatePerfSettingsForComputeContext;
+        public bool ActivatePerfSettingsForComputeContextAll => (ActivatePerfSettingsForComputeContext & 0x1) != 0;
+        public fixed uint Reserved12AC[33];
+        public uint InvalidateSamplerCache;
+        public InvalidateCacheLines InvalidateSamplerCacheLines => (InvalidateCacheLines)((InvalidateSamplerCache >> 0) & 0x1);
+        public int InvalidateSamplerCacheTag => (int)((InvalidateSamplerCache >> 4) & 0x3FFFFF);
+        public uint InvalidateTextureHeaderCache;
+        public InvalidateCacheLines InvalidateTextureHeaderCacheLines => (InvalidateCacheLines)((InvalidateTextureHeaderCache >> 0) & 0x1);
+        public int InvalidateTextureHeaderCacheTag => (int)((InvalidateTextureHeaderCache >> 4) & 0x3FFFFF);
+        public uint InvalidateTextureDataCache;
+        public InvalidateCacheLines InvalidateTextureDataCacheLines => (InvalidateCacheLines)((InvalidateTextureDataCache >> 0) & 0x1);
+        public int InvalidateTextureDataCacheTag => (int)((InvalidateTextureDataCache >> 4) & 0x3FFFFF);
+        public fixed uint Reserved133C[58];
+        public uint InvalidateSamplerCacheNoWfi;
+        public InvalidateCacheLines InvalidateSamplerCacheNoWfiLines => (InvalidateCacheLines)((InvalidateSamplerCacheNoWfi >> 0) & 0x1);
+        public int InvalidateSamplerCacheNoWfiTag => (int)((InvalidateSamplerCacheNoWfi >> 4) & 0x3FFFFF);
+        public fixed uint Reserved1428[64];
+        public uint SetShaderExceptions;
+        public bool SetShaderExceptionsEnable => (SetShaderExceptions & 0x1) != 0;
+        public fixed uint Reserved152C[9];
+        public uint SetRenderEnableA;
+        public int SetRenderEnableAOffsetUpper => (int)((SetRenderEnableA >> 0) & 0xFF);
+        public uint SetRenderEnableB;
+        public uint SetRenderEnableC;
+        public int SetRenderEnableCMode => (int)((SetRenderEnableC >> 0) & 0x7);
+        public uint SetTexSamplerPoolA;
+        public int SetTexSamplerPoolAOffsetUpper => (int)((SetTexSamplerPoolA >> 0) & 0xFF);
+        public uint SetTexSamplerPoolB;
+        public uint SetTexSamplerPoolC;
+        public int SetTexSamplerPoolCMaximumIndex => (int)((SetTexSamplerPoolC >> 0) & 0xFFFFF);
+        public fixed uint Reserved1568[3];
+        public uint SetTexHeaderPoolA;
+        public int SetTexHeaderPoolAOffsetUpper => (int)((SetTexHeaderPoolA >> 0) & 0xFF);
+        public uint SetTexHeaderPoolB;
+        public uint SetTexHeaderPoolC;
+        public int SetTexHeaderPoolCMaximumIndex => (int)((SetTexHeaderPoolC >> 0) & 0x3FFFFF);
+        public fixed uint Reserved1580[34];
+        public uint SetProgramRegionA;
+        public int SetProgramRegionAAddressUpper => (int)((SetProgramRegionA >> 0) & 0xFF);
+        public uint SetProgramRegionB;
+        public fixed uint Reserved1610[34];
+        public uint InvalidateShaderCachesNoWfi;
+        public bool InvalidateShaderCachesNoWfiInstruction => (InvalidateShaderCachesNoWfi & 0x1) != 0;
+        public bool InvalidateShaderCachesNoWfiGlobalData => (InvalidateShaderCachesNoWfi & 0x10) != 0;
+        public bool InvalidateShaderCachesNoWfiConstant => (InvalidateShaderCachesNoWfi & 0x1000) != 0;
+        public fixed uint Reserved169C[170];
+        public uint SetRenderEnableOverride;
+        public SetRenderEnableOverrideMode SetRenderEnableOverrideMode => (SetRenderEnableOverrideMode)((SetRenderEnableOverride >> 0) & 0x3);
+        public fixed uint Reserved1948[57];
+        public uint PipeNop;
+        public uint SetSpare00;
+        public uint SetSpare01;
+        public uint SetSpare02;
+        public uint SetSpare03;
+        public fixed uint Reserved1A40[48];
+        public uint SetReportSemaphoreA;
+        public int SetReportSemaphoreAOffsetUpper => (int)((SetReportSemaphoreA >> 0) & 0xFF);
+        public uint SetReportSemaphoreB;
+        public uint SetReportSemaphoreC;
+        public uint SetReportSemaphoreD;
+        public SetReportSemaphoreDOperation SetReportSemaphoreDOperation => (SetReportSemaphoreDOperation)((SetReportSemaphoreD >> 0) & 0x3);
+        public bool SetReportSemaphoreDAwakenEnable => (SetReportSemaphoreD & 0x100000) != 0;
+        public SetReportSemaphoreDStructureSize SetReportSemaphoreDStructureSize => (SetReportSemaphoreDStructureSize)((SetReportSemaphoreD >> 28) & 0x1);
+        public bool SetReportSemaphoreDFlushDisable => (SetReportSemaphoreD & 0x4) != 0;
+        public bool SetReportSemaphoreDReductionEnable => (SetReportSemaphoreD & 0x8) != 0;
+        public SetReportSemaphoreDReductionOp SetReportSemaphoreDReductionOp => (SetReportSemaphoreDReductionOp)((SetReportSemaphoreD >> 9) & 0x7);
+        public SetReportSemaphoreDReductionFormat SetReportSemaphoreDReductionFormat => (SetReportSemaphoreDReductionFormat)((SetReportSemaphoreD >> 17) & 0x3);
+        public fixed uint Reserved1B10[702];
+        public uint SetBindlessTexture;
+        public int SetBindlessTextureConstantBufferSlotSelect => (int)((SetBindlessTexture >> 0) & 0x7);
+        public uint SetTrapHandler;
+        public fixed uint Reserved2610[843];
+        public Array8<uint> SetShaderPerformanceCounterValueUpper;
+        public Array8<uint> SetShaderPerformanceCounterValue;
+        public Array8<uint> SetShaderPerformanceCounterEvent;
+        public int SetShaderPerformanceCounterEventEvent(int i) => (int)((SetShaderPerformanceCounterEvent[i] >> 0) & 0xFF);
+        public Array8<uint> SetShaderPerformanceCounterControlA;
+        public int SetShaderPerformanceCounterControlAEvent0(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 0) & 0x3);
+        public int SetShaderPerformanceCounterControlABitSelect0(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 2) & 0x7);
+        public int SetShaderPerformanceCounterControlAEvent1(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 5) & 0x3);
+        public int SetShaderPerformanceCounterControlABitSelect1(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 7) & 0x7);
+        public int SetShaderPerformanceCounterControlAEvent2(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 10) & 0x3);
+        public int SetShaderPerformanceCounterControlABitSelect2(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 12) & 0x7);
+        public int SetShaderPerformanceCounterControlAEvent3(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 15) & 0x3);
+        public int SetShaderPerformanceCounterControlABitSelect3(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 17) & 0x7);
+        public int SetShaderPerformanceCounterControlAEvent4(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 20) & 0x3);
+        public int SetShaderPerformanceCounterControlABitSelect4(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 22) & 0x7);
+        public int SetShaderPerformanceCounterControlAEvent5(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 25) & 0x3);
+        public int SetShaderPerformanceCounterControlABitSelect5(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 27) & 0x7);
+        public int SetShaderPerformanceCounterControlASpare(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 30) & 0x3);
+        public Array8<uint> SetShaderPerformanceCounterControlB;
+        public bool SetShaderPerformanceCounterControlBEdge(int i) => (SetShaderPerformanceCounterControlB[i] & 0x1) != 0;
+        public int SetShaderPerformanceCounterControlBMode(int i) => (int)((SetShaderPerformanceCounterControlB[i] >> 1) & 0x3);
+        public bool SetShaderPerformanceCounterControlBWindowed(int i) => (SetShaderPerformanceCounterControlB[i] & 0x8) != 0;
+        public int SetShaderPerformanceCounterControlBFunc(int i) => (int)((SetShaderPerformanceCounterControlB[i] >> 4) & 0xFFFF);
+        public uint SetShaderPerformanceCounterTrapControl;
+        public int SetShaderPerformanceCounterTrapControlMask => (int)((SetShaderPerformanceCounterTrapControl >> 0) & 0xFF);
+        public uint StartShaderPerformanceCounter;
+        public int StartShaderPerformanceCounterCounterMask => (int)((StartShaderPerformanceCounter >> 0) & 0xFF);
+        public uint StopShaderPerformanceCounter;
+        public int StopShaderPerformanceCounterCounterMask => (int)((StopShaderPerformanceCounter >> 0) & 0xFF);
+        public fixed uint Reserved33E8[6];
+        public MmeShadowScratch SetMmeShadowScratch;
+#pragma warning restore CS0649
+    }
+}
diff --git a/Ryujinx.Graphics.Gpu/Engine/ComputeQmd.cs b/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeQmd.cs
similarity index 99%
rename from Ryujinx.Graphics.Gpu/Engine/ComputeQmd.cs
rename to Ryujinx.Graphics.Gpu/Engine/Compute/ComputeQmd.cs
index 35418c2d80..a1116074a2 100644
--- a/Ryujinx.Graphics.Gpu/Engine/ComputeQmd.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeQmd.cs
@@ -2,7 +2,7 @@ using Ryujinx.Graphics.Gpu.State;
 using System;
 using System.Runtime.CompilerServices;
 
-namespace Ryujinx.Graphics.Gpu.Engine
+namespace Ryujinx.Graphics.Gpu.Engine.Compute
 {
     /// <summary>
     /// Type of the dependent Queue Meta Data.
diff --git a/Ryujinx.Graphics.Gpu/Engine/MethodCopyBuffer.cs b/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs
similarity index 51%
rename from Ryujinx.Graphics.Gpu/Engine/MethodCopyBuffer.cs
rename to Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs
index 9064051a27..58fa23266b 100644
--- a/Ryujinx.Graphics.Gpu/Engine/MethodCopyBuffer.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs
@@ -1,14 +1,28 @@
-using Ryujinx.Common;
+using Ryujinx.Common;
+using Ryujinx.Graphics.Device;
 using Ryujinx.Graphics.Gpu.State;
 using Ryujinx.Graphics.Texture;
 using System;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
 using System.Runtime.Intrinsics;
 
-namespace Ryujinx.Graphics.Gpu.Engine
+namespace Ryujinx.Graphics.Gpu.Engine.Dma
 {
-    partial class Methods
+    /// <summary>
+    /// Represents a DMA copy engine class.
+    /// </summary>
+    class DmaClass : IDeviceState
     {
-        enum CopyFlags
+        private readonly GpuContext _context;
+        private readonly GpuChannel _channel;
+        private readonly DeviceState<DmaClassState> _state;
+
+        /// <summary>
+        /// Copy flags passed on DMA launch.
+        /// </summary>
+        [Flags]
+        private enum CopyFlags
         {
             SrcLinear = 1 << 7,
             DstLinear = 1 << 8,
@@ -16,74 +30,111 @@ namespace Ryujinx.Graphics.Gpu.Engine
             RemapEnable = 1 << 10
         }
 
+        /// <summary>
+        /// Creates a new instance of the DMA copy engine class.
+        /// </summary>
+        /// <param name="context">GPU context</param>
+        /// <param name="channel">GPU channel</param>
+        public DmaClass(GpuContext context, GpuChannel channel)
+        {
+            _context = context;
+            _channel = channel;
+            _state = new DeviceState<DmaClassState>(new Dictionary<string, RwCallback>
+            {
+                { nameof(DmaClassState.LaunchDma), new RwCallback(LaunchDma, null) }
+            });
+        }
+
+        /// <summary>
+        /// Reads data from the class registers.
+        /// </summary>
+        /// <param name="offset">Register byte offset</param>
+        /// <returns>Data at the specified offset</returns>
+        public int Read(int offset) => _state.Read(offset);
+
+        /// <summary>
+        /// Writes data to the class registers.
+        /// </summary>
+        /// <param name="offset">Register byte offset</param>
+        /// <param name="data">Data to be written</param>
+        public void Write(int offset, int data) => _state.Write(offset, data);
+
         /// <summary>
         /// Determine if a buffer-to-texture region covers the entirety of a texture.
         /// </summary>
-        /// <param name="cbp">Copy command parameters</param>
         /// <param name="tex">Texture to compare</param>
         /// <param name="linear">True if the texture is linear, false if block linear</param>
         /// <param name="bpp">Texture bytes per pixel</param>
         /// <param name="stride">Texture stride</param>
+        /// <param name="xCount">Number of pixels to be copied</param>
+        /// <param name="yCount">Number of lines to be copied</param>
         /// <returns></returns>
-        private bool IsTextureCopyComplete(CopyBufferParams cbp, CopyBufferTexture tex, bool linear, int bpp, int stride)
+        private static bool IsTextureCopyComplete(CopyBufferTexture tex, bool linear, int bpp, int stride, int xCount, int yCount)
         {
             if (linear)
             {
                 int alignWidth = Constants.StrideAlignment / bpp;
                 return tex.RegionX == 0 &&
                        tex.RegionY == 0 &&
-                       stride / bpp == BitUtils.AlignUp(cbp.XCount, alignWidth);
+                       stride / bpp == BitUtils.AlignUp(xCount, alignWidth);
             }
             else
             {
                 int alignWidth = Constants.GobAlignment / bpp;
                 return tex.RegionX == 0 &&
                        tex.RegionY == 0 &&
-                       tex.Width == BitUtils.AlignUp(cbp.XCount, alignWidth) &&
-                       tex.Height == cbp.YCount;
+                       tex.Width == BitUtils.AlignUp(xCount, alignWidth) &&
+                       tex.Height == yCount;
             }
         }
 
         /// <summary>
         /// Performs a buffer to buffer, or buffer to texture copy.
         /// </summary>
-        /// <param name="state">Current GPU state</param>
         /// <param name="argument">Method call argument</param>
-        private void CopyBuffer(GpuState state, int argument)
+        private void LaunchDma(int argument)
         {
-            var cbp = state.Get<CopyBufferParams>(MethodOffset.CopyBufferParams);
-
-            var swizzle = state.Get<CopyBufferSwizzle>(MethodOffset.CopyBufferSwizzle);
+            var memoryManager = _channel.MemoryManager;
 
             CopyFlags copyFlags = (CopyFlags)argument;
 
             bool srcLinear = copyFlags.HasFlag(CopyFlags.SrcLinear);
             bool dstLinear = copyFlags.HasFlag(CopyFlags.DstLinear);
-            bool copy2D    = copyFlags.HasFlag(CopyFlags.MultiLineEnable);
-            bool remap     = copyFlags.HasFlag(CopyFlags.RemapEnable);
+            bool copy2D = copyFlags.HasFlag(CopyFlags.MultiLineEnable);
+            bool remap = copyFlags.HasFlag(CopyFlags.RemapEnable);
 
-            int size = cbp.XCount;
+            uint size = _state.State.LineLengthIn;
 
             if (size == 0)
             {
                 return;
             }
 
-            FlushUboDirty(state.Channel.MemoryManager);
+            ulong srcGpuVa = ((ulong)_state.State.OffsetInUpperUpper << 32) | _state.State.OffsetInLower;
+            ulong dstGpuVa = ((ulong)_state.State.OffsetOutUpperUpper << 32) | _state.State.OffsetOutLower;
+
+            int xCount = (int)_state.State.LineLengthIn;
+            int yCount = (int)_state.State.LineCount;
+
+            _context.Methods.FlushUboDirty(memoryManager);
 
             if (copy2D)
             {
                 // Buffer to texture copy.
-                int srcBpp = remap ? swizzle.UnpackSrcComponentsCount() * swizzle.UnpackComponentSize() : 1;
-                int dstBpp = remap ? swizzle.UnpackDstComponentsCount() * swizzle.UnpackComponentSize() : 1;
+                int componentSize = (int)_state.State.SetRemapComponentsComponentSize + 1;
+                int srcBpp = remap ? ((int)_state.State.SetRemapComponentsNumSrcComponents + 1) * componentSize : 1;
+                int dstBpp = remap ? ((int)_state.State.SetRemapComponentsNumDstComponents + 1) * componentSize : 1;
 
-                var dst = state.Get<CopyBufferTexture>(MethodOffset.CopyBufferDstTexture);
-                var src = state.Get<CopyBufferTexture>(MethodOffset.CopyBufferSrcTexture);
+                var dst = Unsafe.As<uint, CopyBufferTexture>(ref _state.State.SetDstBlockSize);
+                var src = Unsafe.As<uint, CopyBufferTexture>(ref _state.State.SetSrcBlockSize);
+
+                int srcStride = (int)_state.State.PitchIn;
+                int dstStride = (int)_state.State.PitchOut;
 
                 var srcCalculator = new OffsetCalculator(
                     src.Width,
                     src.Height,
-                    cbp.SrcStride,
+                    srcStride,
                     srcLinear,
                     src.MemoryLayout.UnpackGobBlocksInY(),
                     src.MemoryLayout.UnpackGobBlocksInZ(),
@@ -92,31 +143,34 @@ namespace Ryujinx.Graphics.Gpu.Engine
                 var dstCalculator = new OffsetCalculator(
                     dst.Width,
                     dst.Height,
-                    cbp.DstStride,
+                    dstStride,
                     dstLinear,
                     dst.MemoryLayout.UnpackGobBlocksInY(),
                     dst.MemoryLayout.UnpackGobBlocksInZ(),
                     dstBpp);
 
-                ulong srcBaseAddress = state.Channel.MemoryManager.Translate(cbp.SrcAddress.Pack());
-                ulong dstBaseAddress = state.Channel.MemoryManager.Translate(cbp.DstAddress.Pack());
+                ulong srcBaseAddress = memoryManager.Translate(srcGpuVa);
+                ulong dstBaseAddress = memoryManager.Translate(dstGpuVa);
 
-                (int srcBaseOffset, int srcSize) = srcCalculator.GetRectangleRange(src.RegionX, src.RegionY, cbp.XCount, cbp.YCount);
-                (int dstBaseOffset, int dstSize) = dstCalculator.GetRectangleRange(dst.RegionX, dst.RegionY, cbp.XCount, cbp.YCount);
+                (int srcBaseOffset, int srcSize) = srcCalculator.GetRectangleRange(src.RegionX, src.RegionY, xCount, yCount);
+                (int dstBaseOffset, int dstSize) = dstCalculator.GetRectangleRange(dst.RegionX, dst.RegionY, xCount, yCount);
 
-                ReadOnlySpan<byte> srcSpan = state.Channel.MemoryManager.Physical.GetSpan(srcBaseAddress + (ulong)srcBaseOffset, srcSize, true);
-                Span<byte> dstSpan         = state.Channel.MemoryManager.Physical.GetSpan(dstBaseAddress + (ulong)dstBaseOffset, dstSize).ToArray();
+                ReadOnlySpan<byte> srcSpan = memoryManager.Physical.GetSpan(srcBaseAddress + (ulong)srcBaseOffset, srcSize, true);
+                Span<byte> dstSpan = memoryManager.Physical.GetSpan(dstBaseAddress + (ulong)dstBaseOffset, dstSize).ToArray();
 
-                bool completeSource = IsTextureCopyComplete(cbp, src, srcLinear, srcBpp, cbp.SrcStride);
-                bool completeDest   = IsTextureCopyComplete(cbp, dst, dstLinear, dstBpp, cbp.DstStride);
+                bool completeSource = IsTextureCopyComplete(src, srcLinear, srcBpp, srcStride, xCount, yCount);
+                bool completeDest = IsTextureCopyComplete(dst, dstLinear, dstBpp, dstStride, xCount, yCount);
 
                 if (completeSource && completeDest)
                 {
-                    Image.Texture target = state.Channel.MemoryManager.Physical.TextureCache.FindTexture(
-                        state.Channel.MemoryManager,
+                    var target = memoryManager.Physical.TextureCache.FindTexture(
+                        memoryManager,
                         dst,
-                        cbp,
-                        swizzle,
+                        dstGpuVa,
+                        dstBpp,
+                        dstStride,
+                        xCount,
+                        yCount,
                         dstLinear);
 
                     if (target != null)
@@ -129,7 +183,7 @@ namespace Ryujinx.Graphics.Gpu.Engine
                                 target.Info.Height,
                                 1,
                                 1,
-                                cbp.SrcStride,
+                                srcStride,
                                 target.Info.FormatInfo.BytesPerPixel,
                                 srcSpan);
                         }
@@ -160,7 +214,7 @@ namespace Ryujinx.Graphics.Gpu.Engine
                     {
                         srcSpan.CopyTo(dstSpan); // No layout conversion has to be performed, just copy the data entirely.
 
-                        state.Channel.MemoryManager.Physical.Write(dstBaseAddress + (ulong)dstBaseOffset, dstSpan);
+                        memoryManager.Physical.Write(dstBaseAddress + (ulong)dstBaseOffset, dstSpan);
 
                         return;
                     }
@@ -173,12 +227,12 @@ namespace Ryujinx.Graphics.Gpu.Engine
                         byte* dstBase = dstPtr - dstBaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset.
                         byte* srcBase = srcPtr - srcBaseOffset;
 
-                        for (int y = 0; y < cbp.YCount; y++)
+                        for (int y = 0; y < yCount; y++)
                         {
                             srcCalculator.SetY(src.RegionY + y);
                             dstCalculator.SetY(dst.RegionY + y);
 
-                            for (int x = 0; x < cbp.XCount; x++)
+                            for (int x = 0; x < xCount; x++)
                             {
                                 int srcOffset = srcCalculator.GetOffset(src.RegionX + x);
                                 int dstOffset = dstCalculator.GetOffset(dst.RegionX + x);
@@ -201,37 +255,29 @@ namespace Ryujinx.Graphics.Gpu.Engine
                     _ => throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format.")
                 };
 
-                state.Channel.MemoryManager.Physical.Write(dstBaseAddress + (ulong)dstBaseOffset, dstSpan);
+                memoryManager.Physical.Write(dstBaseAddress + (ulong)dstBaseOffset, dstSpan);
             }
             else
             {
                 if (remap &&
-                    swizzle.UnpackDstX() == BufferSwizzleComponent.ConstA &&
-                    swizzle.UnpackDstY() == BufferSwizzleComponent.ConstA &&
-                    swizzle.UnpackDstZ() == BufferSwizzleComponent.ConstA &&
-                    swizzle.UnpackDstW() == BufferSwizzleComponent.ConstA &&
-                    swizzle.UnpackSrcComponentsCount() == 1 &&
-                    swizzle.UnpackDstComponentsCount() == 1 &&
-                    swizzle.UnpackComponentSize() == 4)
+                    _state.State.SetRemapComponentsDstX == SetRemapComponentsDst.ConstA &&
+                    _state.State.SetRemapComponentsDstY == SetRemapComponentsDst.ConstA &&
+                    _state.State.SetRemapComponentsDstZ == SetRemapComponentsDst.ConstA &&
+                    _state.State.SetRemapComponentsDstW == SetRemapComponentsDst.ConstA &&
+                    _state.State.SetRemapComponentsNumSrcComponents == SetRemapComponentsNumComponents.One &&
+                    _state.State.SetRemapComponentsNumDstComponents == SetRemapComponentsNumComponents.One &&
+                    _state.State.SetRemapComponentsComponentSize == SetRemapComponentsComponentSize.Four)
                 {
                     // Fast path for clears when remap is enabled.
-                    state.Channel.MemoryManager.Physical.BufferCache.ClearBuffer(
-                        state.Channel.MemoryManager,
-                        cbp.DstAddress,
-                        (uint)size * 4,
-                        state.Get<uint>(MethodOffset.CopyBufferConstA));
+                    memoryManager.Physical.BufferCache.ClearBuffer(memoryManager, dstGpuVa, size * 4, _state.State.SetRemapConstA);
                 }
                 else
                 {
                     // TODO: Implement remap functionality.
                     // Buffer to buffer copy.
-                    state.Channel.MemoryManager.Physical.BufferCache.CopyBuffer(
-                        state.Channel.MemoryManager,
-                        cbp.SrcAddress,
-                        cbp.DstAddress,
-                        (uint)size);
+                    memoryManager.Physical.BufferCache.CopyBuffer(memoryManager, srcGpuVa, dstGpuVa, size);
                 }
             }
         }
     }
-}
\ No newline at end of file
+}
diff --git a/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClassState.cs b/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClassState.cs
new file mode 100644
index 0000000000..7de4d5f08d
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClassState.cs
@@ -0,0 +1,271 @@
+// This file was auto-generated from NVIDIA official Maxwell definitions.
+
+namespace Ryujinx.Graphics.Gpu.Engine.Dma
+{
+    /// <summary>
+    /// Physical mode target.
+    /// </summary>
+    enum SetPhysModeTarget
+    {
+        LocalFb = 0,
+        CoherentSysmem = 1,
+        NoncoherentSysmem = 2,
+    }
+
+    /// <summary>
+    /// DMA data transfer type.
+    /// </summary>
+    enum LaunchDmaDataTransferType
+    {
+        None = 0,
+        Pipelined = 1,
+        NonPipelined = 2,
+    }
+
+    /// <summary>
+    /// DMA semaphore type.
+    /// </summary>
+    enum LaunchDmaSemaphoreType
+    {
+        None = 0,
+        ReleaseOneWordSemaphore = 1,
+        ReleaseFourWordSemaphore = 2,
+    }
+
+    /// <summary>
+    /// DMA interrupt type.
+    /// </summary>
+    enum LaunchDmaInterruptType
+    {
+        None = 0,
+        Blocking = 1,
+        NonBlocking = 2,
+    }
+
+    /// <summary>
+    /// DMA destination memory layout.
+    /// </summary>
+    enum LaunchDmaMemoryLayout
+    {
+        Blocklinear = 0,
+        Pitch = 1,
+    }
+
+    /// <summary>
+    /// DMA type.
+    /// </summary>
+    enum LaunchDmaType
+    {
+        Virtual = 0,
+        Physical = 1,
+    }
+
+    /// <summary>
+    /// DMA semaphore reduction operation.
+    /// </summary>
+    enum LaunchDmaSemaphoreReduction
+    {
+        Imin = 0,
+        Imax = 1,
+        Ixor = 2,
+        Iand = 3,
+        Ior = 4,
+        Iadd = 5,
+        Inc = 6,
+        Dec = 7,
+        Fadd = 10,
+    }
+
+    /// <summary>
+    /// DMA semaphore reduction signedness.
+    /// </summary>
+    enum LaunchDmaSemaphoreReductionSign
+    {
+        Signed = 0,
+        Unsigned = 1,
+    }
+
+    /// <summary>
+    /// DMA L2 cache bypass.
+    /// </summary>
+    enum LaunchDmaBypassL2
+    {
+        UsePteSetting = 0,
+        ForceVolatile = 1,
+    }
+
+    /// <summary>
+    /// DMA component remapping source component.
+    /// </summary>
+    enum SetRemapComponentsDst
+    {
+        SrcX = 0,
+        SrcY = 1,
+        SrcZ = 2,
+        SrcW = 3,
+        ConstA = 4,
+        ConstB = 5,
+        NoWrite = 6,
+    }
+
+    /// <summary>
+    /// DMA component remapping component size.
+    /// </summary>
+    enum SetRemapComponentsComponentSize
+    {
+        One = 0,
+        Two = 1,
+        Three = 2,
+        Four = 3,
+    }
+
+    /// <summary>
+    /// DMA component remapping number of components.
+    /// </summary>
+    enum SetRemapComponentsNumComponents
+    {
+        One = 0,
+        Two = 1,
+        Three = 2,
+        Four = 3,
+    }
+
+    /// <summary>
+    /// Width in GOBs of the destination texture.
+    /// </summary>
+    enum SetBlockSizeWidth
+    {
+        QuarterGob = 14,
+        OneGob = 0,
+    }
+
+    /// <summary>
+    /// Height in GOBs of the destination texture.
+    /// </summary>
+    enum SetBlockSizeHeight
+    {
+        OneGob = 0,
+        TwoGobs = 1,
+        FourGobs = 2,
+        EightGobs = 3,
+        SixteenGobs = 4,
+        ThirtytwoGobs = 5,
+    }
+
+    /// <summary>
+    /// Depth in GOBs of the destination texture.
+    /// </summary>
+    enum SetBlockSizeDepth
+    {
+        OneGob = 0,
+        TwoGobs = 1,
+        FourGobs = 2,
+        EightGobs = 3,
+        SixteenGobs = 4,
+        ThirtytwoGobs = 5,
+    }
+
+    /// <summary>
+    /// Height of a single GOB in lines.
+    /// </summary>
+    enum SetBlockSizeGobHeight
+    {
+        GobHeightTesla4 = 0,
+        GobHeightFermi8 = 1,
+    }
+
+    /// <summary>
+    /// DMA copy class state.
+    /// </summary>
+    unsafe struct DmaClassState
+    {
+#pragma warning disable CS0649
+        public fixed uint Reserved00[64];
+        public uint Nop;
+        public fixed uint Reserved104[15];
+        public uint PmTrigger;
+        public fixed uint Reserved144[63];
+        public uint SetSemaphoreA;
+        public int SetSemaphoreAUpper => (int)((SetSemaphoreA >> 0) & 0xFF);
+        public uint SetSemaphoreB;
+        public uint SetSemaphorePayload;
+        public fixed uint Reserved24C[2];
+        public uint SetRenderEnableA;
+        public int SetRenderEnableAUpper => (int)((SetRenderEnableA >> 0) & 0xFF);
+        public uint SetRenderEnableB;
+        public uint SetRenderEnableC;
+        public int SetRenderEnableCMode => (int)((SetRenderEnableC >> 0) & 0x7);
+        public uint SetSrcPhysMode;
+        public SetPhysModeTarget SetSrcPhysModeTarget => (SetPhysModeTarget)((SetSrcPhysMode >> 0) & 0x3);
+        public uint SetDstPhysMode;
+        public SetPhysModeTarget SetDstPhysModeTarget => (SetPhysModeTarget)((SetDstPhysMode >> 0) & 0x3);
+        public fixed uint Reserved268[38];
+        public uint LaunchDma;
+        public LaunchDmaDataTransferType LaunchDmaDataTransferType => (LaunchDmaDataTransferType)((LaunchDma >> 0) & 0x3);
+        public bool LaunchDmaFlushEnable => (LaunchDma & 0x4) != 0;
+        public LaunchDmaSemaphoreType LaunchDmaSemaphoreType => (LaunchDmaSemaphoreType)((LaunchDma >> 3) & 0x3);
+        public LaunchDmaInterruptType LaunchDmaInterruptType => (LaunchDmaInterruptType)((LaunchDma >> 5) & 0x3);
+        public LaunchDmaMemoryLayout LaunchDmaSrcMemoryLayout => (LaunchDmaMemoryLayout)((LaunchDma >> 7) & 0x1);
+        public LaunchDmaMemoryLayout LaunchDmaDstMemoryLayout => (LaunchDmaMemoryLayout)((LaunchDma >> 8) & 0x1);
+        public bool LaunchDmaMultiLineEnable => (LaunchDma & 0x200) != 0;
+        public bool LaunchDmaRemapEnable => (LaunchDma & 0x400) != 0;
+        public bool LaunchDmaForceRmwdisable => (LaunchDma & 0x800) != 0;
+        public LaunchDmaType LaunchDmaSrcType => (LaunchDmaType)((LaunchDma >> 12) & 0x1);
+        public LaunchDmaType LaunchDmaDstType => (LaunchDmaType)((LaunchDma >> 13) & 0x1);
+        public LaunchDmaSemaphoreReduction LaunchDmaSemaphoreReduction => (LaunchDmaSemaphoreReduction)((LaunchDma >> 14) & 0xF);
+        public LaunchDmaSemaphoreReductionSign LaunchDmaSemaphoreReductionSign => (LaunchDmaSemaphoreReductionSign)((LaunchDma >> 18) & 0x1);
+        public bool LaunchDmaSemaphoreReductionEnable => (LaunchDma & 0x80000) != 0;
+        public LaunchDmaBypassL2 LaunchDmaBypassL2 => (LaunchDmaBypassL2)((LaunchDma >> 20) & 0x1);
+        public fixed uint Reserved304[63];
+        public uint OffsetInUpper;
+        public int OffsetInUpperUpper => (int)((OffsetInUpper >> 0) & 0xFF);
+        public uint OffsetInLower;
+        public uint OffsetOutUpper;
+        public int OffsetOutUpperUpper => (int)((OffsetOutUpper >> 0) & 0xFF);
+        public uint OffsetOutLower;
+        public uint PitchIn;
+        public uint PitchOut;
+        public uint LineLengthIn;
+        public uint LineCount;
+        public fixed uint Reserved420[184];
+        public uint SetRemapConstA;
+        public uint SetRemapConstB;
+        public uint SetRemapComponents;
+        public SetRemapComponentsDst SetRemapComponentsDstX => (SetRemapComponentsDst)((SetRemapComponents >> 0) & 0x7);
+        public SetRemapComponentsDst SetRemapComponentsDstY => (SetRemapComponentsDst)((SetRemapComponents >> 4) & 0x7);
+        public SetRemapComponentsDst SetRemapComponentsDstZ => (SetRemapComponentsDst)((SetRemapComponents >> 8) & 0x7);
+        public SetRemapComponentsDst SetRemapComponentsDstW => (SetRemapComponentsDst)((SetRemapComponents >> 12) & 0x7);
+        public SetRemapComponentsComponentSize SetRemapComponentsComponentSize => (SetRemapComponentsComponentSize)((SetRemapComponents >> 16) & 0x3);
+        public SetRemapComponentsNumComponents SetRemapComponentsNumSrcComponents => (SetRemapComponentsNumComponents)((SetRemapComponents >> 20) & 0x3);
+        public SetRemapComponentsNumComponents SetRemapComponentsNumDstComponents => (SetRemapComponentsNumComponents)((SetRemapComponents >> 24) & 0x3);
+        public uint SetDstBlockSize;
+        public SetBlockSizeWidth SetDstBlockSizeWidth => (SetBlockSizeWidth)((SetDstBlockSize >> 0) & 0xF);
+        public SetBlockSizeHeight SetDstBlockSizeHeight => (SetBlockSizeHeight)((SetDstBlockSize >> 4) & 0xF);
+        public SetBlockSizeDepth SetDstBlockSizeDepth => (SetBlockSizeDepth)((SetDstBlockSize >> 8) & 0xF);
+        public SetBlockSizeGobHeight SetDstBlockSizeGobHeight => (SetBlockSizeGobHeight)((SetDstBlockSize >> 12) & 0xF);
+        public uint SetDstWidth;
+        public uint SetDstHeight;
+        public uint SetDstDepth;
+        public uint SetDstLayer;
+        public uint SetDstOrigin;
+        public int SetDstOriginX => (int)((SetDstOrigin >> 0) & 0xFFFF);
+        public int SetDstOriginY => (int)((SetDstOrigin >> 16) & 0xFFFF);
+        public uint Reserved724;
+        public uint SetSrcBlockSize;
+        public SetBlockSizeWidth SetSrcBlockSizeWidth => (SetBlockSizeWidth)((SetSrcBlockSize >> 0) & 0xF);
+        public SetBlockSizeHeight SetSrcBlockSizeHeight => (SetBlockSizeHeight)((SetSrcBlockSize >> 4) & 0xF);
+        public SetBlockSizeDepth SetSrcBlockSizeDepth => (SetBlockSizeDepth)((SetSrcBlockSize >> 8) & 0xF);
+        public SetBlockSizeGobHeight SetSrcBlockSizeGobHeight => (SetBlockSizeGobHeight)((SetSrcBlockSize >> 12) & 0xF);
+        public uint SetSrcWidth;
+        public uint SetSrcHeight;
+        public uint SetSrcDepth;
+        public uint SetSrcLayer;
+        public uint SetSrcOrigin;
+        public int SetSrcOriginX => (int)((SetSrcOrigin >> 0) & 0xFFFF);
+        public int SetSrcOriginY => (int)((SetSrcOrigin >> 16) & 0xFFFF);
+        public fixed uint Reserved740[629];
+        public uint PmTriggerEnd;
+        public fixed uint Reserved1118[2490];
+#pragma warning restore CS0649
+    }
+}
diff --git a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClassState.cs b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClassState.cs
index 28dd15bd08..07d062eb62 100644
--- a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClassState.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClassState.cs
@@ -4,6 +4,9 @@ using Ryujinx.Common.Memory;
 
 namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
 {
+    /// <summary>
+    /// Semaphore operation.
+    /// </summary>
     enum SemaphoredOperation
     {
         Acquire = 1,
@@ -13,24 +16,36 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
         Reduction = 16
     }
 
+    /// <summary>
+    /// Semaphore acquire switch enable.
+    /// </summary>
     enum SemaphoredAcquireSwitch
     {
         Disabled = 0,
         Enabled = 1
     }
 
+    /// <summary>
+    /// Semaphore release interrupt wait enable.
+    /// </summary>
     enum SemaphoredReleaseWfi
     {
         En = 0,
         Dis = 1
     }
 
+    /// <summary>
+    /// Semaphore release structure size.
+    /// </summary>
     enum SemaphoredReleaseSize
     {
         SixteenBytes = 0,
         FourBytes = 1
     }
 
+    /// <summary>
+    /// Semaphore reduction operation.
+    /// </summary>
     enum SemaphoredReduction
     {
         Min = 0,
@@ -43,24 +58,36 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
         Dec = 7
     }
 
+    /// <summary>
+    /// Semaphore format.
+    /// </summary>
     enum SemaphoredFormat
     {
         Signed = 0,
         Unsigned = 1
     }
 
+    /// <summary>
+    /// Memory Translation Lookaside Buffer Page Directory Buffer invalidation.
+    /// </summary>
     enum MemOpCTlbInvalidatePdb
     {
         One = 0,
         All = 1
     }
 
+    /// <summary>
+    /// Memory Translation Lookaside Buffer GPC invalidation enable.
+    /// </summary>
     enum MemOpCTlbInvalidateGpc
     {
         Enable = 0,
         Disable = 1
     }
 
+    /// <summary>
+    /// Memory Translation Lookaside Buffer invalidation target.
+    /// </summary>
     enum MemOpCTlbInvalidateTarget
     {
         VidMem = 0,
@@ -68,6 +95,9 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
         SysMemNoncoherent = 3
     }
 
+    /// <summary>
+    /// Memory operation.
+    /// </summary>
     enum MemOpDOperation
     {
         Membar = 5,
@@ -78,24 +108,36 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
         L2FlushDirty = 16
     }
 
+    /// <summary>
+    /// Syncpoint operation.
+    /// </summary>
     enum SyncpointbOperation
     {
         Wait = 0,
         Incr = 1
     }
 
+    /// <summary>
+    /// Syncpoint wait switch enable.
+    /// </summary>
     enum SyncpointbWaitSwitch
     {
         Dis = 0,
         En = 1
     }
 
+    /// <summary>
+    /// Wait for interrupt scope.
+    /// </summary>
     enum WfiScope
     {
         CurrentScgType = 0,
         All = 1
     }
 
+    /// <summary>
+    /// Yield operation.
+    /// </summary>
     enum YieldOp
     {
         Nop = 0,
@@ -104,6 +146,9 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
         Tsg = 3
     }
 
+    /// <summary>
+    /// General Purpose FIFO class state.
+    /// </summary>
     struct GPFifoClassState
     {
 #pragma warning disable CS0649
diff --git a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs
index c683d1790b..d12b681024 100644
--- a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs
@@ -1,4 +1,9 @@
-using Ryujinx.Graphics.Gpu.Memory;
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.Gpu.Engine.Compute;
+using Ryujinx.Graphics.Gpu.Engine.Dma;
+using Ryujinx.Graphics.Gpu.Engine.InlineToMemory;
+using Ryujinx.Graphics.Gpu.Engine.Twod;
+using Ryujinx.Graphics.Gpu.Memory;
 using Ryujinx.Graphics.Gpu.State;
 using System;
 using System.Runtime.CompilerServices;
@@ -33,6 +38,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
         private DmaState _state;
 
         private readonly GpuState[] _subChannels;
+        private readonly IDeviceState[] _subChannels2;
         private readonly GPFifoClass _fifoClass;
 
         /// <summary>
@@ -47,10 +53,21 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
 
             _fifoClass = new GPFifoClass(context, this);
             _subChannels = new GpuState[8];
+            _subChannels2 = new IDeviceState[8]
+            {
+                null,
+                new ComputeClass(context, channel),
+                new InlineToMemoryClass(context, channel),
+                new TwodClass(channel),
+                new DmaClass(context, channel),
+                null,
+                null,
+                null
+            };
 
             for (int index = 0; index < _subChannels.Length; index++)
             {
-                _subChannels[index] = new GpuState(channel);
+                _subChannels[index] = new GpuState(channel, _subChannels2[index]);
 
                 _context.Methods.RegisterCallbacks(_subChannels[index]);
             }
diff --git a/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs b/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs
new file mode 100644
index 0000000000..0e7d6fb076
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs
@@ -0,0 +1,211 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.Texture;
+using System;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory
+{
+    /// <summary>
+    /// Represents a Inline-to-Memory engine class.
+    /// </summary>
+    class InlineToMemoryClass : IDeviceState
+    {
+        private readonly GpuContext _context;
+        private readonly GpuChannel _channel;
+        private readonly DeviceState<InlineToMemoryClassState> _state;
+
+        private bool _isLinear;
+
+        private int _offset;
+        private int _size;
+
+        private ulong _dstGpuVa;
+        private int _dstX;
+        private int _dstY;
+        private int _dstWidth;
+        private int _dstHeight;
+        private int _dstStride;
+        private int _dstGobBlocksInY;
+        private int _lineLengthIn;
+        private int _lineCount;
+
+        private bool _finished;
+
+        private int[] _buffer;
+
+        /// <summary>
+        /// Creates a new instance of the Inline-to-Memory engine class.
+        /// </summary>
+        /// <param name="context">GPU context</param>
+        /// <param name="channel">GPU channel</param>
+        /// <param name="initializeState">Indicates if the internal state should be initialized. Set to false if part of another engine</param>
+        protected InlineToMemoryClass(GpuContext context, GpuChannel channel, bool initializeState)
+        {
+            _context = context;
+            _channel = channel;
+
+            if (initializeState)
+            {
+                _state = new DeviceState<InlineToMemoryClassState>(new Dictionary<string, RwCallback>
+                {
+                    { nameof(InlineToMemoryClassState.LaunchDma), new RwCallback(LaunchDma, null) },
+                    { nameof(InlineToMemoryClassState.LoadInlineData), new RwCallback(LoadInlineData, null) }
+                });
+            }
+        }
+
+        /// <summary>
+        /// Creates a new instance of the inline-to-memory engine class.
+        /// </summary>
+        /// <param name="context">GPU context</param>
+        /// <param name="channel">GPU channel</param>
+        public InlineToMemoryClass(GpuContext context, GpuChannel channel) : this(context, channel, true)
+        {
+        }
+
+        /// <summary>
+        /// Reads data from the class registers.
+        /// </summary>
+        /// <param name="offset">Register byte offset</param>
+        /// <returns>Data at the specified offset</returns>
+        public virtual int Read(int offset) => _state.Read(offset);
+
+        /// <summary>
+        /// Writes data to the class registers.
+        /// </summary>
+        /// <param name="offset">Register byte offset</param>
+        /// <param name="data">Data to be written</param>
+        public virtual void Write(int offset, int data) => _state.Write(offset, data);
+
+        /// <summary>
+        /// Launches Inline-to-Memory engine DMA copy.
+        /// </summary>
+        /// <param name="argument">Method call argument</param>
+        protected virtual void LaunchDma(int argument)
+        {
+            LaunchDma(ref _state.State, argument);
+        }
+
+        /// <summary>
+        /// Launches Inline-to-Memory engine DMA copy.
+        /// </summary>
+        /// <param name="state">Current class state</param>
+        /// <param name="argument">Method call argument</param>
+        protected void LaunchDma(ref InlineToMemoryClassState state, int argument)
+        {
+            _isLinear = (argument & 1) != 0;
+
+            _offset = 0;
+            _size = (int)(state.LineLengthIn * state.LineCount);
+
+            int count = BitUtils.DivRoundUp(_size, 4);
+
+            if (_buffer == null || _buffer.Length < count)
+            {
+                _buffer = new int[count];
+            }
+
+            ulong dstGpuVa = ((ulong)state.OffsetOutUpperValue << 32) | state.OffsetOut;
+
+            ulong dstBaseAddress = _channel.MemoryManager.Translate(dstGpuVa);
+
+            // Trigger read tracking, to flush any managed resources in the destination region.
+            _channel.MemoryManager.Physical.GetSpan(dstBaseAddress, _size, true);
+
+            _dstGpuVa = dstGpuVa;
+            _dstX = state.SetDstOriginBytesXV;
+            _dstY = state.SetDstOriginSamplesYV;
+            _dstWidth = (int)state.SetDstWidth;
+            _dstHeight = (int)state.SetDstHeight;
+            _dstStride = (int)state.PitchOut;
+            _dstGobBlocksInY = 1 << (int)state.SetDstBlockSizeHeight;
+            _lineLengthIn = (int)state.LineLengthIn;
+            _lineCount = (int)state.LineCount;
+
+            _finished = false;
+        }
+
+        /// <summary>
+        /// Pushes a word of data to the Inline-to-Memory engine.
+        /// </summary>
+        /// <param name="argument">Method call argument</param>
+        protected void LoadInlineData(int argument)
+        {
+            if (!_finished)
+            {
+                _buffer[_offset++] = argument;
+
+                if (_offset * 4 >= _size)
+                {
+                    FinishTransfer();
+                }
+            }
+        }
+
+        /// <summary>
+        /// Performs actual copy of the inline data after the transfer is finished.
+        /// </summary>
+        private void FinishTransfer()
+        {
+            Span<byte> data = MemoryMarshal.Cast<int, byte>(_buffer).Slice(0, _size);
+
+            if (_isLinear && _lineCount == 1)
+            {
+                ulong address = _channel.MemoryManager.Translate(_dstGpuVa);
+
+                _channel.MemoryManager.Physical.Write(address, data);
+            }
+            else
+            {
+                var dstCalculator = new OffsetCalculator(
+                    _dstWidth,
+                    _dstHeight,
+                    _dstStride,
+                    _isLinear,
+                    _dstGobBlocksInY,
+                    1);
+
+                int srcOffset = 0;
+
+                ulong dstBaseAddress = _channel.MemoryManager.Translate(_dstGpuVa);
+
+                for (int y = _dstY; y < _dstY + _lineCount; y++)
+                {
+                    int x1 = _dstX;
+                    int x2 = _dstX + _lineLengthIn;
+                    int x2Trunc = _dstX + BitUtils.AlignDown(_lineLengthIn, 16);
+
+                    int x;
+
+                    for (x = x1; x < x2Trunc; x += 16, srcOffset += 16)
+                    {
+                        int dstOffset = dstCalculator.GetOffset(x, y);
+
+                        ulong dstAddress = dstBaseAddress + (ulong)dstOffset;
+
+                        Span<byte> pixel = data.Slice(srcOffset, 16);
+
+                        _channel.MemoryManager.Physical.Write(dstAddress, pixel);
+                    }
+
+                    for (; x < x2; x++, srcOffset++)
+                    {
+                        int dstOffset = dstCalculator.GetOffset(x, y);
+
+                        ulong dstAddress = dstBaseAddress + (ulong)dstOffset;
+
+                        Span<byte> pixel = data.Slice(srcOffset, 1);
+
+                        _channel.MemoryManager.Physical.Write(dstAddress, pixel);
+                    }
+                }
+            }
+
+            _finished = true;
+
+            _context.AdvanceSequence();
+        }
+    }
+}
diff --git a/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClassState.cs b/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClassState.cs
new file mode 100644
index 0000000000..d0c82a5e47
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClassState.cs
@@ -0,0 +1,181 @@
+// This file was auto-generated from NVIDIA official Maxwell definitions.
+
+namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory
+{
+    /// <summary>
+    /// Notify type.
+    /// </summary>
+    enum NotifyType
+    {
+        WriteOnly = 0,
+        WriteThenAwaken = 1,
+    }
+
+    /// <summary>
+    /// Width in GOBs of the destination texture.
+    /// </summary>
+    enum SetDstBlockSizeWidth
+    {
+        OneGob = 0,
+    }
+
+    /// <summary>
+    /// Height in GOBs of the destination texture.
+    /// </summary>
+    enum SetDstBlockSizeHeight
+    {
+        OneGob = 0,
+        TwoGobs = 1,
+        FourGobs = 2,
+        EightGobs = 3,
+        SixteenGobs = 4,
+        ThirtytwoGobs = 5,
+    }
+
+    /// <summary>
+    /// Depth in GOBs of the destination texture.
+    /// </summary>
+    enum SetDstBlockSizeDepth
+    {
+        OneGob = 0,
+        TwoGobs = 1,
+        FourGobs = 2,
+        EightGobs = 3,
+        SixteenGobs = 4,
+        ThirtytwoGobs = 5,
+    }
+
+    /// <summary>
+    /// Memory layout of the destination texture.
+    /// </summary>
+    enum LaunchDmaDstMemoryLayout
+    {
+        Blocklinear = 0,
+        Pitch = 1,
+    }
+
+    /// <summary>
+    /// DMA completion type.
+    /// </summary>
+    enum LaunchDmaCompletionType
+    {
+        FlushDisable = 0,
+        FlushOnly = 1,
+        ReleaseSemaphore = 2,
+    }
+
+    /// <summary>
+    /// DMA interrupt type.
+    /// </summary>
+    enum LaunchDmaInterruptType
+    {
+        None = 0,
+        Interrupt = 1,
+    }
+
+    /// <summary>
+    /// DMA semaphore structure size.
+    /// </summary>
+    enum LaunchDmaSemaphoreStructSize
+    {
+        FourWords = 0,
+        OneWord = 1,
+    }
+
+    /// <summary>
+    /// DMA semaphore reduction operation.
+    /// </summary>
+    enum LaunchDmaReductionOp
+    {
+        RedAdd = 0,
+        RedMin = 1,
+        RedMax = 2,
+        RedInc = 3,
+        RedDec = 4,
+        RedAnd = 5,
+        RedOr = 6,
+        RedXor = 7,
+    }
+
+    /// <summary>
+    /// DMA semaphore reduction format.
+    /// </summary>
+    enum LaunchDmaReductionFormat
+    {
+        Unsigned32 = 0,
+        Signed32 = 1,
+    }
+
+    /// <summary>
+    /// Inline-to-Memory class state.
+    /// </summary>
+    unsafe struct InlineToMemoryClassState
+    {
+#pragma warning disable CS0649
+        public uint SetObject;
+        public int SetObjectClassId => (int)((SetObject >> 0) & 0xFFFF);
+        public int SetObjectEngineId => (int)((SetObject >> 16) & 0x1F);
+        public fixed uint Reserved04[63];
+        public uint NoOperation;
+        public uint SetNotifyA;
+        public int SetNotifyAAddressUpper => (int)((SetNotifyA >> 0) & 0xFF);
+        public uint SetNotifyB;
+        public uint Notify;
+        public NotifyType NotifyType => (NotifyType)(Notify);
+        public uint WaitForIdle;
+        public fixed uint Reserved114[7];
+        public uint SetGlobalRenderEnableA;
+        public int SetGlobalRenderEnableAOffsetUpper => (int)((SetGlobalRenderEnableA >> 0) & 0xFF);
+        public uint SetGlobalRenderEnableB;
+        public uint SetGlobalRenderEnableC;
+        public int SetGlobalRenderEnableCMode => (int)((SetGlobalRenderEnableC >> 0) & 0x7);
+        public uint SendGoIdle;
+        public uint PmTrigger;
+        public uint PmTriggerWfi;
+        public fixed uint Reserved148[2];
+        public uint SetInstrumentationMethodHeader;
+        public uint SetInstrumentationMethodData;
+        public fixed uint Reserved158[10];
+        public uint LineLengthIn;
+        public uint LineCount;
+        public uint OffsetOutUpper;
+        public int OffsetOutUpperValue => (int)((OffsetOutUpper >> 0) & 0xFF);
+        public uint OffsetOut;
+        public uint PitchOut;
+        public uint SetDstBlockSize;
+        public SetDstBlockSizeWidth SetDstBlockSizeWidth => (SetDstBlockSizeWidth)((SetDstBlockSize >> 0) & 0xF);
+        public SetDstBlockSizeHeight SetDstBlockSizeHeight => (SetDstBlockSizeHeight)((SetDstBlockSize >> 4) & 0xF);
+        public SetDstBlockSizeDepth SetDstBlockSizeDepth => (SetDstBlockSizeDepth)((SetDstBlockSize >> 8) & 0xF);
+        public uint SetDstWidth;
+        public uint SetDstHeight;
+        public uint SetDstDepth;
+        public uint SetDstLayer;
+        public uint SetDstOriginBytesX;
+        public int SetDstOriginBytesXV => (int)((SetDstOriginBytesX >> 0) & 0xFFFFF);
+        public uint SetDstOriginSamplesY;
+        public int SetDstOriginSamplesYV => (int)((SetDstOriginSamplesY >> 0) & 0xFFFF);
+        public uint LaunchDma;
+        public LaunchDmaDstMemoryLayout LaunchDmaDstMemoryLayout => (LaunchDmaDstMemoryLayout)((LaunchDma >> 0) & 0x1);
+        public LaunchDmaCompletionType LaunchDmaCompletionType => (LaunchDmaCompletionType)((LaunchDma >> 4) & 0x3);
+        public LaunchDmaInterruptType LaunchDmaInterruptType => (LaunchDmaInterruptType)((LaunchDma >> 8) & 0x3);
+        public LaunchDmaSemaphoreStructSize LaunchDmaSemaphoreStructSize => (LaunchDmaSemaphoreStructSize)((LaunchDma >> 12) & 0x1);
+        public bool LaunchDmaReductionEnable => (LaunchDma & 0x2) != 0;
+        public LaunchDmaReductionOp LaunchDmaReductionOp => (LaunchDmaReductionOp)((LaunchDma >> 13) & 0x7);
+        public LaunchDmaReductionFormat LaunchDmaReductionFormat => (LaunchDmaReductionFormat)((LaunchDma >> 2) & 0x3);
+        public bool LaunchDmaSysmembarDisable => (LaunchDma & 0x40) != 0;
+        public uint LoadInlineData;
+        public fixed uint Reserved1B8[9];
+        public uint SetI2mSemaphoreA;
+        public int SetI2mSemaphoreAOffsetUpper => (int)((SetI2mSemaphoreA >> 0) & 0xFF);
+        public uint SetI2mSemaphoreB;
+        public uint SetI2mSemaphoreC;
+        public fixed uint Reserved1E8[2];
+        public uint SetI2mSpareNoop00;
+        public uint SetI2mSpareNoop01;
+        public uint SetI2mSpareNoop02;
+        public uint SetI2mSpareNoop03;
+        public fixed uint Reserved200[3200];
+        public MmeShadowScratch SetMmeShadowScratch;
+#pragma warning restore CS0649
+    }
+}
diff --git a/Ryujinx.Graphics.Gpu/Engine/MethodUniformBufferUpdate.cs b/Ryujinx.Graphics.Gpu/Engine/MethodUniformBufferUpdate.cs
index 1343dbe7be..49c8cda445 100644
--- a/Ryujinx.Graphics.Gpu/Engine/MethodUniformBufferUpdate.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/MethodUniformBufferUpdate.cs
@@ -17,7 +17,7 @@ namespace Ryujinx.Graphics.Gpu.Engine
         /// Flushes any queued ubo updates.
         /// </summary>
         /// <param name="memoryManager">GPU memory manager where the uniform buffer is mapped</param>
-        private void FlushUboDirty(MemoryManager memoryManager)
+        public void FlushUboDirty(MemoryManager memoryManager)
         {
             if (_ubFollowUpAddress != 0)
             {
diff --git a/Ryujinx.Graphics.Gpu/Engine/Methods.cs b/Ryujinx.Graphics.Gpu/Engine/Methods.cs
index aaac9441d4..756d56d96f 100644
--- a/Ryujinx.Graphics.Gpu/Engine/Methods.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/Methods.cs
@@ -50,13 +50,8 @@ namespace Ryujinx.Graphics.Gpu.Engine
             state.RegisterCallback(MethodOffset.LaunchDma,      LaunchDma);
             state.RegisterCallback(MethodOffset.LoadInlineData, LoadInlineData);
 
-            state.RegisterCallback(MethodOffset.Dispatch, Dispatch);
-
             state.RegisterCallback(MethodOffset.SyncpointAction, IncrementSyncpoint);
 
-            state.RegisterCallback(MethodOffset.CopyBuffer,  CopyBuffer);
-            state.RegisterCallback(MethodOffset.CopyTexture, CopyTexture);
-
             state.RegisterCallback(MethodOffset.TextureBarrier,      TextureBarrier);
             state.RegisterCallback(MethodOffset.TextureBarrierTiled, TextureBarrierTiled);
 
@@ -956,24 +951,6 @@ namespace Ryujinx.Graphics.Gpu.Engine
             _context.Renderer.Pipeline.SetLogicOpState(logicOpState.Enable, logicOpState.LogicalOp);
         }
 
-        /// <summary>
-        /// Storage buffer address and size information.
-        /// </summary>
-        private struct SbDescriptor
-        {
-#pragma warning disable CS0649
-            public uint AddressLow;
-            public uint AddressHigh;
-            public int  Size;
-            public int  Padding;
-#pragma warning restore CS0649
-
-            public ulong PackAddress()
-            {
-                return AddressLow | ((ulong)AddressHigh << 32);
-            }
-        }
-
         /// <summary>
         /// Updates host shaders based on the guest GPU state.
         /// </summary>
@@ -1088,6 +1065,14 @@ namespace Ryujinx.Graphics.Gpu.Engine
             _context.Renderer.Pipeline.SetProgram(gs.HostProgram);
         }
 
+        /// <summary>
+        /// Forces the shaders to be rebound on the next draw.
+        /// </summary>
+        public void ForceShaderUpdate()
+        {
+            _forceShaderUpdate = true;
+        }
+
         /// <summary>
         /// Updates transform feedback buffer state based on the guest GPU state.
         /// </summary>
diff --git a/Ryujinx.Graphics.Gpu/Engine/MmeShadowScratch.cs b/Ryujinx.Graphics.Gpu/Engine/MmeShadowScratch.cs
new file mode 100644
index 0000000000..df9021e068
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Engine/MmeShadowScratch.cs
@@ -0,0 +1,15 @@
+using System;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine
+{
+    [StructLayout(LayoutKind.Sequential, Size = 1024)]
+    struct MmeShadowScratch
+    {
+#pragma warning disable CS0169
+        private uint _e0;
+#pragma warning restore CS0169
+        public ref uint this[int index] => ref ToSpan()[index];
+        public Span<uint> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 256);
+    }
+}
diff --git a/Ryujinx.Graphics.Gpu/Engine/MethodCopyTexture.cs b/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClass.cs
similarity index 57%
rename from Ryujinx.Graphics.Gpu/Engine/MethodCopyTexture.cs
rename to Ryujinx.Graphics.Gpu/Engine/Twod/TwodClass.cs
index 946d0dd5fc..d4f6d879ea 100644
--- a/Ryujinx.Graphics.Gpu/Engine/MethodCopyTexture.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClass.cs
@@ -1,35 +1,67 @@
+using Ryujinx.Graphics.Device;
 using Ryujinx.Graphics.GAL;
 using Ryujinx.Graphics.Gpu.Image;
 using Ryujinx.Graphics.Gpu.State;
 using Ryujinx.Graphics.Texture;
 using System;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
 
-namespace Ryujinx.Graphics.Gpu.Engine
+namespace Ryujinx.Graphics.Gpu.Engine.Twod
 {
-    using Texture = Image.Texture;
-
-    partial class Methods
+    /// <summary>
+    /// Represents a 2D engine class.
+    /// </summary>
+    class TwodClass : IDeviceState
     {
+        private readonly GpuChannel _channel;
+        private readonly DeviceState<TwodClassState> _state;
+
         /// <summary>
-        /// Performs a texture to texture copy.
+        /// Creates a new instance of the 2D engine class.
         /// </summary>
-        /// <param name="state">Current GPU state</param>
-        /// <param name="argument">Method call argument</param>
-        private void CopyTexture(GpuState state, int argument)
+        /// <param name="channel">The channel that will make use of the engine</param>
+        public TwodClass(GpuChannel channel)
         {
-            var memoryManager = state.Channel.MemoryManager;
+            _channel = channel;
+            _state = new DeviceState<TwodClassState>(new Dictionary<string, RwCallback>
+            {
+                { nameof(TwodClassState.PixelsFromMemorySrcY0Int), new RwCallback(PixelsFromMemorySrcY0Int, null) }
+            });
+        }
 
-            var dstCopyTexture = state.Get<CopyTexture>(MethodOffset.CopyDstTexture);
-            var srcCopyTexture = state.Get<CopyTexture>(MethodOffset.CopySrcTexture);
+        /// <summary>
+        /// Reads data from the class registers.
+        /// </summary>
+        /// <param name="offset">Register byte offset</param>
+        /// <returns>Data at the specified offset</returns>
+        public int Read(int offset) => _state.Read(offset);
 
-            var region = state.Get<CopyRegion>(MethodOffset.CopyRegion);
+        /// <summary>
+        /// Writes data to the class registers.
+        /// </summary>
+        /// <param name="offset">Register byte offset</param>
+        /// <param name="data">Data to be written</param>
+        public void Write(int offset, int data) => _state.Write(offset, data);
 
-            var control = state.Get<CopyTextureControl>(MethodOffset.CopyTextureControl);
+        /// <summary>
+        /// Performs the blit operation, triggered by the register write.
+        /// </summary>
+        /// <param name="argument">Method call argument</param>
+        private void PixelsFromMemorySrcY0Int(int argument)
+        {
+            var memoryManager = _channel.MemoryManager;
 
-            bool originCorner = control.UnpackOriginCorner();
+            var dstCopyTexture = Unsafe.As<uint, CopyTexture>(ref _state.State.SetDstFormat);
+            var srcCopyTexture = Unsafe.As<uint, CopyTexture>(ref _state.State.SetSrcFormat);
 
-            long srcX = region.SrcXF;
-            long srcY = region.SrcYF;
+            long srcX = ((long)_state.State.SetPixelsFromMemorySrcX0Int << 32) | (long)(ulong)_state.State.SetPixelsFromMemorySrcX0Frac;
+            long srcY = ((long)_state.State.PixelsFromMemorySrcY0Int << 32) | (long)(ulong)_state.State.SetPixelsFromMemorySrcY0Frac;
+
+            long duDx = ((long)_state.State.SetPixelsFromMemoryDuDxInt << 32) | (long)(ulong)_state.State.SetPixelsFromMemoryDuDxFrac;
+            long dvDy = ((long)_state.State.SetPixelsFromMemoryDvDyInt << 32) | (long)(ulong)_state.State.SetPixelsFromMemoryDvDyFrac;
+
+            bool originCorner = _state.State.SetPixelsFromMemorySampleModeOrigin == SetPixelsFromMemorySampleModeOrigin.Corner;
 
             if (originCorner)
             {
@@ -40,21 +72,21 @@ namespace Ryujinx.Graphics.Gpu.Engine
                 // The offset is calculated as FactorXY / 2.0, where FactorXY = SrcXY / DstXY,
                 // so we do the same here by dividing the fixed point value by 2, while
                 // throwing away the fractional part to avoid rounding errors.
-                srcX -= (region.SrcWidthRF >> 33) << 32;
-                srcY -= (region.SrcHeightRF >> 33) << 32;
+                srcX -= (duDx >> 33) << 32;
+                srcY -= (dvDy >> 33) << 32;
             }
 
             int srcX1 = (int)(srcX >> 32);
             int srcY1 = (int)(srcY >> 32);
 
-            int srcX2 = srcX1 + (int)((region.SrcWidthRF * region.DstWidth + uint.MaxValue) >> 32);
-            int srcY2 = srcY1 + (int)((region.SrcHeightRF * region.DstHeight + uint.MaxValue) >> 32);
+            int srcX2 = srcX1 + (int)((duDx * _state.State.SetPixelsFromMemoryDstWidth + uint.MaxValue) >> 32);
+            int srcY2 = srcY1 + (int)((dvDy * _state.State.SetPixelsFromMemoryDstHeight + uint.MaxValue) >> 32);
 
-            int dstX1 = region.DstX;
-            int dstY1 = region.DstY;
+            int dstX1 = (int)_state.State.SetPixelsFromMemoryDstX0;
+            int dstY1 = (int)_state.State.SetPixelsFromMemoryDstY0;
 
-            int dstX2 = region.DstX + region.DstWidth;
-            int dstY2 = region.DstY + region.DstHeight;
+            int dstX2 = dstX1 + (int)_state.State.SetPixelsFromMemoryDstWidth;
+            int dstY2 = dstY1 + (int)_state.State.SetPixelsFromMemoryDstHeight;
 
             // The source and destination textures should at least be as big as the region being requested.
             // The hints will only resize within alignment constraints, so out of bound copies won't resize in most cases.
@@ -82,7 +114,7 @@ namespace Ryujinx.Graphics.Gpu.Engine
                 srcX1 = 0;
             }
 
-            Texture srcTexture = memoryManager.Physical.TextureCache.FindOrCreateTexture(
+            var srcTexture = memoryManager.Physical.TextureCache.FindOrCreateTexture(
                 memoryManager,
                 srcCopyTexture,
                 offset,
@@ -109,7 +141,7 @@ namespace Ryujinx.Graphics.Gpu.Engine
                 dstCopyTextureFormat = dstCopyTexture.Format.Convert();
             }
 
-            Texture dstTexture = memoryManager.Physical.TextureCache.FindOrCreateTexture(
+            var dstTexture = memoryManager.Physical.TextureCache.FindOrCreateTexture(
                 memoryManager,
                 dstCopyTexture,
                 0,
@@ -137,11 +169,11 @@ namespace Ryujinx.Graphics.Gpu.Engine
                 (int)Math.Ceiling(dstScale * (dstX2 / dstTexture.Info.SamplesInX)),
                 (int)Math.Ceiling(dstScale * (dstY2 / dstTexture.Info.SamplesInY)));
 
-            bool linearFilter = control.UnpackLinearFilter();
+            bool linearFilter = _state.State.SetPixelsFromMemorySampleModeFilter == SetPixelsFromMemorySampleModeFilter.Bilinear;
 
             srcTexture.HostTexture.CopyTo(dstTexture.HostTexture, srcRegion, dstRegion, linearFilter);
 
             dstTexture.SignalModified();
         }
     }
-}
\ No newline at end of file
+}
diff --git a/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClassState.cs b/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClassState.cs
new file mode 100644
index 0000000000..fdc4204db7
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClassState.cs
@@ -0,0 +1,827 @@
+// This file was auto-generated from NVIDIA official Maxwell definitions.
+
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Gpu.Engine.Twod
+{
+    /// <summary>
+    /// Notify type.
+    /// </summary>
+    enum NotifyType
+    {
+        WriteOnly = 0,
+        WriteThenAwaken = 1,
+    }
+
+    /// <summary>
+    /// MME shadow RAM control mode.
+    /// </summary>
+    enum SetMmeShadowRamControlMode
+    {
+        MethodTrack = 0,
+        MethodTrackWithFilter = 1,
+        MethodPassthrough = 2,
+        MethodReplay = 3,
+    }
+
+    /// <summary>
+    /// Format of the destination texture.
+    /// </summary>
+    enum SetDstFormatV
+    {
+        A8r8g8b8 = 207,
+        A8rl8gl8bl8 = 208,
+        A2r10g10b10 = 223,
+        A8b8g8r8 = 213,
+        A8bl8gl8rl8 = 214,
+        A2b10g10r10 = 209,
+        X8r8g8b8 = 230,
+        X8rl8gl8bl8 = 231,
+        X8b8g8r8 = 249,
+        X8bl8gl8rl8 = 250,
+        R5g6b5 = 232,
+        A1r5g5b5 = 233,
+        X1r5g5b5 = 248,
+        Y8 = 243,
+        Y16 = 238,
+        Y32 = 255,
+        Z1r5g5b5 = 251,
+        O1r5g5b5 = 252,
+        Z8r8g8b8 = 253,
+        O8r8g8b8 = 254,
+        Y18x8 = 28,
+        Rf16 = 242,
+        Rf32 = 229,
+        Rf32Gf32 = 203,
+        Rf16Gf16Bf16Af16 = 202,
+        Rf16Gf16Bf16X16 = 206,
+        Rf32Gf32Bf32Af32 = 192,
+        Rf32Gf32Bf32X32 = 195,
+        R16G16B16A16 = 198,
+        Rn16Gn16Bn16An16 = 199,
+        Bf10gf11rf11 = 224,
+        An8bn8gn8rn8 = 215,
+        Rf16Gf16 = 222,
+        R16G16 = 218,
+        Rn16Gn16 = 219,
+        G8r8 = 234,
+        Gn8rn8 = 235,
+        Rn16 = 239,
+        Rn8 = 244,
+        A8 = 247,
+    }
+
+    /// <summary>
+    /// Memory layout of the destination texture.
+    /// </summary>
+    enum SetDstMemoryLayoutV
+    {
+        Blocklinear = 0,
+        Pitch = 1,
+    }
+
+    /// <summary>
+    /// Height in GOBs of the destination texture.
+    /// </summary>
+    enum SetDstBlockSizeHeight
+    {
+        OneGob = 0,
+        TwoGobs = 1,
+        FourGobs = 2,
+        EightGobs = 3,
+        SixteenGobs = 4,
+        ThirtytwoGobs = 5,
+    }
+
+    /// <summary>
+    /// Depth in GOBs of the destination texture.
+    /// </summary>
+    enum SetDstBlockSizeDepth
+    {
+        OneGob = 0,
+        TwoGobs = 1,
+        FourGobs = 2,
+        EightGobs = 3,
+        SixteenGobs = 4,
+        ThirtytwoGobs = 5,
+    }
+
+    /// <summary>
+    /// Format of the source texture.
+    /// </summary>
+    enum SetSrcFormatV
+    {
+        A8r8g8b8 = 207,
+        A8rl8gl8bl8 = 208,
+        A2r10g10b10 = 223,
+        A8b8g8r8 = 213,
+        A8bl8gl8rl8 = 214,
+        A2b10g10r10 = 209,
+        X8r8g8b8 = 230,
+        X8rl8gl8bl8 = 231,
+        X8b8g8r8 = 249,
+        X8bl8gl8rl8 = 250,
+        R5g6b5 = 232,
+        A1r5g5b5 = 233,
+        X1r5g5b5 = 248,
+        Y8 = 243,
+        Ay8 = 29,
+        Y16 = 238,
+        Y32 = 255,
+        Z1r5g5b5 = 251,
+        O1r5g5b5 = 252,
+        Z8r8g8b8 = 253,
+        O8r8g8b8 = 254,
+        Y18x8 = 28,
+        Rf16 = 242,
+        Rf32 = 229,
+        Rf32Gf32 = 203,
+        Rf16Gf16Bf16Af16 = 202,
+        Rf16Gf16Bf16X16 = 206,
+        Rf32Gf32Bf32Af32 = 192,
+        Rf32Gf32Bf32X32 = 195,
+        R16G16B16A16 = 198,
+        Rn16Gn16Bn16An16 = 199,
+        Bf10gf11rf11 = 224,
+        An8bn8gn8rn8 = 215,
+        Rf16Gf16 = 222,
+        R16G16 = 218,
+        Rn16Gn16 = 219,
+        G8r8 = 234,
+        Gn8rn8 = 235,
+        Rn16 = 239,
+        Rn8 = 244,
+        A8 = 247,
+    }
+
+    /// <summary>
+    /// Memory layout of the source texture.
+    /// </summary>
+    enum SetSrcMemoryLayoutV
+    {
+        Blocklinear = 0,
+        Pitch = 1,
+    }
+
+    /// <summary>
+    /// Height in GOBs of the source texture.
+    /// </summary>
+    enum SetSrcBlockSizeHeight
+    {
+        OneGob = 0,
+        TwoGobs = 1,
+        FourGobs = 2,
+        EightGobs = 3,
+        SixteenGobs = 4,
+        ThirtytwoGobs = 5,
+    }
+
+    /// <summary>
+    /// Depth in GOBs of the source texture.
+    /// </summary>
+    enum SetSrcBlockSizeDepth
+    {
+        OneGob = 0,
+        TwoGobs = 1,
+        FourGobs = 2,
+        EightGobs = 3,
+        SixteenGobs = 4,
+        ThirtytwoGobs = 5,
+    }
+
+    /// <summary>
+    /// Texture data caches to invalidate.
+    /// </summary>
+    enum TwodInvalidateTextureDataCacheV
+    {
+        L1Only = 0,
+        L2Only = 1,
+        L1AndL2 = 2,
+    }
+
+    /// <summary>
+    /// Sector promotion parameters.
+    /// </summary>
+    enum SetPixelsFromMemorySectorPromotionV
+    {
+        NoPromotion = 0,
+        PromoteTo2V = 1,
+        PromoteTo2H = 2,
+        PromoteTo4 = 3,
+    }
+
+    /// <summary>
+    /// Number of processing clusters.
+    /// </summary>
+    enum SetNumProcessingClustersV
+    {
+        All = 0,
+        One = 1,
+    }
+
+    /// <summary>
+    /// Color key format.
+    /// </summary>
+    enum SetColorKeyFormatV
+    {
+        A16r5g6b5 = 0,
+        A1r5g5b5 = 1,
+        A8r8g8b8 = 2,
+        A2r10g10b10 = 3,
+        Y8 = 4,
+        Y16 = 5,
+        Y32 = 6,
+    }
+
+    /// <summary>
+    /// Color blit operation.
+    /// </summary>
+    enum SetOperationV
+    {
+        SrccopyAnd = 0,
+        RopAnd = 1,
+        BlendAnd = 2,
+        Srccopy = 3,
+        Rop = 4,
+        SrccopyPremult = 5,
+        BlendPremult = 6,
+    }
+
+    /// <summary>
+    /// Texture pattern selection.
+    /// </summary>
+    enum SetPatternSelectV
+    {
+        Monochrome8x8 = 0,
+        Monochrome64x1 = 1,
+        Monochrome1x64 = 2,
+        Color = 3,
+    }
+
+    /// <summary>
+    /// Render enable override mode.
+    /// </summary>
+    enum SetRenderEnableOverrideMode
+    {
+        UseRenderEnable = 0,
+        AlwaysRender = 1,
+        NeverRender = 2,
+    }
+
+    /// <summary>
+    /// Pixels from memory horizontal direction.
+    /// </summary>
+    enum SetPixelsFromMemoryDirectionHorizontal
+    {
+        HwDecides = 0,
+        LeftToRight = 1,
+        RightToLeft = 2,
+    }
+
+    /// <summary>
+    /// Pixels from memory vertical direction.
+    /// </summary>
+    enum SetPixelsFromMemoryDirectionVertical
+    {
+        HwDecides = 0,
+        TopToBottom = 1,
+        BottomToTop = 2,
+    }
+
+    /// <summary>
+    /// Color format of the monochrome pattern.
+    /// </summary>
+    enum SetMonochromePatternColorFormatV
+    {
+        A8x8r5g6b5 = 0,
+        A1r5g5b5 = 1,
+        A8r8g8b8 = 2,
+        A8y8 = 3,
+        A8x8y16 = 4,
+        Y32 = 5,
+        ByteExpand = 6,
+    }
+
+    /// <summary>
+    /// Format of the monochrome pattern.
+    /// </summary>
+    enum SetMonochromePatternFormatV
+    {
+        Cga6M1 = 0,
+        LeM1 = 1,
+    }
+
+    /// <summary>
+    /// DMA semaphore reduction operation.
+    /// </summary>
+    enum MmeDmaReductionReductionOp
+    {
+        RedAdd = 0,
+        RedMin = 1,
+        RedMax = 2,
+        RedInc = 3,
+        RedDec = 4,
+        RedAnd = 5,
+        RedOr = 6,
+        RedXor = 7,
+    }
+
+    /// <summary>
+    /// DMA semaphore reduction format.
+    /// </summary>
+    enum MmeDmaReductionReductionFormat
+    {
+        Unsigned = 0,
+        Signed = 1,
+    }
+
+    /// <summary>
+    /// DMA semaphore reduction size.
+    /// </summary>
+    enum MmeDmaReductionReductionSize
+    {
+        FourBytes = 0,
+        EightBytes = 1,
+    }
+
+    /// <summary>
+    /// Data FIFO size.
+    /// </summary>
+    enum SetMmeDataFifoConfigFifoSize
+    {
+        Size0kb = 0,
+        Size4kb = 1,
+        Size8kb = 2,
+        Size12kb = 3,
+        Size16kb = 4,
+    }
+
+    /// <summary>
+    /// Render solid primitive mode.
+    /// </summary>
+    enum RenderSolidPrimModeV
+    {
+        Points = 0,
+        Lines = 1,
+        Polyline = 2,
+        Triangles = 3,
+        Rects = 4,
+    }
+
+    /// <summary>
+    /// Render solid primitive color format.
+    /// </summary>
+    enum SetRenderSolidPrimColorFormatV
+    {
+        Rf32Gf32Bf32Af32 = 192,
+        Rf16Gf16Bf16Af16 = 202,
+        Rf32Gf32 = 203,
+        A8r8g8b8 = 207,
+        A2r10g10b10 = 223,
+        A8b8g8r8 = 213,
+        A2b10g10r10 = 209,
+        X8r8g8b8 = 230,
+        X8b8g8r8 = 249,
+        R5g6b5 = 232,
+        A1r5g5b5 = 233,
+        X1r5g5b5 = 248,
+        Y8 = 243,
+        Y16 = 238,
+        Y32 = 255,
+        Z1r5g5b5 = 251,
+        O1r5g5b5 = 252,
+        Z8r8g8b8 = 253,
+        O8r8g8b8 = 254,
+    }
+
+    /// <summary>
+    /// Pixels from CPU data type.
+    /// </summary>
+    enum SetPixelsFromCpuDataTypeV
+    {
+        Color = 0,
+        Index = 1,
+    }
+
+    /// <summary>
+    /// Pixels from CPU color format.
+    /// </summary>
+    enum SetPixelsFromCpuColorFormatV
+    {
+        A8r8g8b8 = 207,
+        A2r10g10b10 = 223,
+        A8b8g8r8 = 213,
+        A2b10g10r10 = 209,
+        X8r8g8b8 = 230,
+        X8b8g8r8 = 249,
+        R5g6b5 = 232,
+        A1r5g5b5 = 233,
+        X1r5g5b5 = 248,
+        Y8 = 243,
+        Y16 = 238,
+        Y32 = 255,
+        Z1r5g5b5 = 251,
+        O1r5g5b5 = 252,
+        Z8r8g8b8 = 253,
+        O8r8g8b8 = 254,
+    }
+
+    /// <summary>
+    /// Pixels from CPU palette index format.
+    /// </summary>
+    enum SetPixelsFromCpuIndexFormatV
+    {
+        I1 = 0,
+        I4 = 1,
+        I8 = 2,
+    }
+
+    /// <summary>
+    /// Pixels from CPU monochrome format.
+    /// </summary>
+    enum SetPixelsFromCpuMonoFormatV
+    {
+        Cga6M1 = 0,
+        LeM1 = 1,
+    }
+
+    /// <summary>
+    /// Pixels from CPU wrap mode.
+    /// </summary>
+    enum SetPixelsFromCpuWrapV
+    {
+        WrapPixel = 0,
+        WrapByte = 1,
+        WrapDword = 2,
+    }
+
+    /// <summary>
+    /// Pixels from CPU monochrome opacity.
+    /// </summary>
+    enum SetPixelsFromCpuMonoOpacityV
+    {
+        Transparent = 0,
+        Opaque = 1,
+    }
+
+    /// <summary>
+    /// Pixels from memory block shape.
+    /// </summary>
+    enum SetPixelsFromMemoryBlockShapeV
+    {
+        Auto = 0,
+        Shape8x8 = 1,
+        Shape16x4 = 2,
+    }
+
+    /// <summary>
+    /// Pixels from memory origin.
+    /// </summary>
+    enum SetPixelsFromMemorySampleModeOrigin
+    {
+        Center = 0,
+        Corner = 1,
+    }
+
+    /// <summary>
+    /// Pixels from memory filter mode.
+    /// </summary>
+    enum SetPixelsFromMemorySampleModeFilter
+    {
+        Point = 0,
+        Bilinear = 1,
+    }
+
+    /// <summary>
+    /// Render solid primitive point coordinates.
+    /// </summary>
+    struct RenderSolidPrimPoint
+    {
+#pragma warning disable CS0649
+        public uint SetX;
+        public uint Y;
+#pragma warning restore CS0649
+    }
+
+    /// <summary>
+    /// 2D class state.
+    /// </summary>
+    unsafe struct TwodClassState
+    {
+#pragma warning disable CS0649
+        public uint SetObject;
+        public int SetObjectClassId => (int)((SetObject >> 0) & 0xFFFF);
+        public int SetObjectEngineId => (int)((SetObject >> 16) & 0x1F);
+        public fixed uint Reserved04[63];
+        public uint NoOperation;
+        public uint SetNotifyA;
+        public int SetNotifyAAddressUpper => (int)((SetNotifyA >> 0) & 0x1FFFFFF);
+        public uint SetNotifyB;
+        public uint Notify;
+        public NotifyType NotifyType => (NotifyType)(Notify);
+        public uint WaitForIdle;
+        public uint LoadMmeInstructionRamPointer;
+        public uint LoadMmeInstructionRam;
+        public uint LoadMmeStartAddressRamPointer;
+        public uint LoadMmeStartAddressRam;
+        public uint SetMmeShadowRamControl;
+        public SetMmeShadowRamControlMode SetMmeShadowRamControlMode => (SetMmeShadowRamControlMode)((SetMmeShadowRamControl >> 0) & 0x3);
+        public fixed uint Reserved128[2];
+        public uint SetGlobalRenderEnableA;
+        public int SetGlobalRenderEnableAOffsetUpper => (int)((SetGlobalRenderEnableA >> 0) & 0xFF);
+        public uint SetGlobalRenderEnableB;
+        public uint SetGlobalRenderEnableC;
+        public int SetGlobalRenderEnableCMode => (int)((SetGlobalRenderEnableC >> 0) & 0x7);
+        public uint SendGoIdle;
+        public uint PmTrigger;
+        public fixed uint Reserved144[3];
+        public uint SetInstrumentationMethodHeader;
+        public uint SetInstrumentationMethodData;
+        public fixed uint Reserved158[37];
+        public uint SetMmeSwitchState;
+        public bool SetMmeSwitchStateValid => (SetMmeSwitchState & 0x1) != 0;
+        public int SetMmeSwitchStateSaveMacro => (int)((SetMmeSwitchState >> 4) & 0xFF);
+        public int SetMmeSwitchStateRestoreMacro => (int)((SetMmeSwitchState >> 12) & 0xFF);
+        public fixed uint Reserved1F0[4];
+        public uint SetDstFormat;
+        public SetDstFormatV SetDstFormatV => (SetDstFormatV)((SetDstFormat >> 0) & 0xFF);
+        public uint SetDstMemoryLayout;
+        public SetDstMemoryLayoutV SetDstMemoryLayoutV => (SetDstMemoryLayoutV)((SetDstMemoryLayout >> 0) & 0x1);
+        public uint SetDstBlockSize;
+        public SetDstBlockSizeHeight SetDstBlockSizeHeight => (SetDstBlockSizeHeight)((SetDstBlockSize >> 4) & 0x7);
+        public SetDstBlockSizeDepth SetDstBlockSizeDepth => (SetDstBlockSizeDepth)((SetDstBlockSize >> 8) & 0x7);
+        public uint SetDstDepth;
+        public uint SetDstLayer;
+        public uint SetDstPitch;
+        public uint SetDstWidth;
+        public uint SetDstHeight;
+        public uint SetDstOffsetUpper;
+        public int SetDstOffsetUpperV => (int)((SetDstOffsetUpper >> 0) & 0xFF);
+        public uint SetDstOffsetLower;
+        public uint FlushAndInvalidateRopMiniCache;
+        public bool FlushAndInvalidateRopMiniCacheV => (FlushAndInvalidateRopMiniCache & 0x1) != 0;
+        public uint SetSpareNoop06;
+        public uint SetSrcFormat;
+        public SetSrcFormatV SetSrcFormatV => (SetSrcFormatV)((SetSrcFormat >> 0) & 0xFF);
+        public uint SetSrcMemoryLayout;
+        public SetSrcMemoryLayoutV SetSrcMemoryLayoutV => (SetSrcMemoryLayoutV)((SetSrcMemoryLayout >> 0) & 0x1);
+        public uint SetSrcBlockSize;
+        public SetSrcBlockSizeHeight SetSrcBlockSizeHeight => (SetSrcBlockSizeHeight)((SetSrcBlockSize >> 4) & 0x7);
+        public SetSrcBlockSizeDepth SetSrcBlockSizeDepth => (SetSrcBlockSizeDepth)((SetSrcBlockSize >> 8) & 0x7);
+        public uint SetSrcDepth;
+        public uint TwodInvalidateTextureDataCache;
+        public TwodInvalidateTextureDataCacheV TwodInvalidateTextureDataCacheV => (TwodInvalidateTextureDataCacheV)((TwodInvalidateTextureDataCache >> 0) & 0x3);
+        public uint SetSrcPitch;
+        public uint SetSrcWidth;
+        public uint SetSrcHeight;
+        public uint SetSrcOffsetUpper;
+        public int SetSrcOffsetUpperV => (int)((SetSrcOffsetUpper >> 0) & 0xFF);
+        public uint SetSrcOffsetLower;
+        public uint SetPixelsFromMemorySectorPromotion;
+        public SetPixelsFromMemorySectorPromotionV SetPixelsFromMemorySectorPromotionV => (SetPixelsFromMemorySectorPromotionV)((SetPixelsFromMemorySectorPromotion >> 0) & 0x3);
+        public uint SetSpareNoop12;
+        public uint SetNumProcessingClusters;
+        public SetNumProcessingClustersV SetNumProcessingClustersV => (SetNumProcessingClustersV)((SetNumProcessingClusters >> 0) & 0x1);
+        public uint SetRenderEnableA;
+        public int SetRenderEnableAOffsetUpper => (int)((SetRenderEnableA >> 0) & 0xFF);
+        public uint SetRenderEnableB;
+        public uint SetRenderEnableC;
+        public int SetRenderEnableCMode => (int)((SetRenderEnableC >> 0) & 0x7);
+        public uint SetSpareNoop08;
+        public uint SetSpareNoop01;
+        public uint SetSpareNoop11;
+        public uint SetSpareNoop07;
+        public uint SetClipX0;
+        public uint SetClipY0;
+        public uint SetClipWidth;
+        public uint SetClipHeight;
+        public uint SetClipEnable;
+        public bool SetClipEnableV => (SetClipEnable & 0x1) != 0;
+        public uint SetColorKeyFormat;
+        public SetColorKeyFormatV SetColorKeyFormatV => (SetColorKeyFormatV)((SetColorKeyFormat >> 0) & 0x7);
+        public uint SetColorKey;
+        public uint SetColorKeyEnable;
+        public bool SetColorKeyEnableV => (SetColorKeyEnable & 0x1) != 0;
+        public uint SetRop;
+        public int SetRopV => (int)((SetRop >> 0) & 0xFF);
+        public uint SetBeta1;
+        public uint SetBeta4;
+        public int SetBeta4B => (int)((SetBeta4 >> 0) & 0xFF);
+        public int SetBeta4G => (int)((SetBeta4 >> 8) & 0xFF);
+        public int SetBeta4R => (int)((SetBeta4 >> 16) & 0xFF);
+        public int SetBeta4A => (int)((SetBeta4 >> 24) & 0xFF);
+        public uint SetOperation;
+        public SetOperationV SetOperationV => (SetOperationV)((SetOperation >> 0) & 0x7);
+        public uint SetPatternOffset;
+        public int SetPatternOffsetX => (int)((SetPatternOffset >> 0) & 0x3F);
+        public int SetPatternOffsetY => (int)((SetPatternOffset >> 8) & 0x3F);
+        public uint SetPatternSelect;
+        public SetPatternSelectV SetPatternSelectV => (SetPatternSelectV)((SetPatternSelect >> 0) & 0x3);
+        public uint SetDstColorRenderToZetaSurface;
+        public bool SetDstColorRenderToZetaSurfaceV => (SetDstColorRenderToZetaSurface & 0x1) != 0;
+        public uint SetSpareNoop04;
+        public uint SetSpareNoop15;
+        public uint SetSpareNoop13;
+        public uint SetSpareNoop03;
+        public uint SetSpareNoop14;
+        public uint SetSpareNoop02;
+        public uint SetCompression;
+        public bool SetCompressionEnable => (SetCompression & 0x1) != 0;
+        public uint SetSpareNoop09;
+        public uint SetRenderEnableOverride;
+        public SetRenderEnableOverrideMode SetRenderEnableOverrideMode => (SetRenderEnableOverrideMode)((SetRenderEnableOverride >> 0) & 0x3);
+        public uint SetPixelsFromMemoryDirection;
+        public SetPixelsFromMemoryDirectionHorizontal SetPixelsFromMemoryDirectionHorizontal => (SetPixelsFromMemoryDirectionHorizontal)((SetPixelsFromMemoryDirection >> 0) & 0x3);
+        public SetPixelsFromMemoryDirectionVertical SetPixelsFromMemoryDirectionVertical => (SetPixelsFromMemoryDirectionVertical)((SetPixelsFromMemoryDirection >> 4) & 0x3);
+        public uint SetSpareNoop10;
+        public uint SetMonochromePatternColorFormat;
+        public SetMonochromePatternColorFormatV SetMonochromePatternColorFormatV => (SetMonochromePatternColorFormatV)((SetMonochromePatternColorFormat >> 0) & 0x7);
+        public uint SetMonochromePatternFormat;
+        public SetMonochromePatternFormatV SetMonochromePatternFormatV => (SetMonochromePatternFormatV)((SetMonochromePatternFormat >> 0) & 0x1);
+        public uint SetMonochromePatternColor0;
+        public uint SetMonochromePatternColor1;
+        public uint SetMonochromePattern0;
+        public uint SetMonochromePattern1;
+        public Array64<uint> ColorPatternX8r8g8b8;
+        public int ColorPatternX8r8g8b8B0(int i) => (int)((ColorPatternX8r8g8b8[i] >> 0) & 0xFF);
+        public int ColorPatternX8r8g8b8G0(int i) => (int)((ColorPatternX8r8g8b8[i] >> 8) & 0xFF);
+        public int ColorPatternX8r8g8b8R0(int i) => (int)((ColorPatternX8r8g8b8[i] >> 16) & 0xFF);
+        public int ColorPatternX8r8g8b8Ignore0(int i) => (int)((ColorPatternX8r8g8b8[i] >> 24) & 0xFF);
+        public Array32<uint> ColorPatternR5g6b5;
+        public int ColorPatternR5g6b5B0(int i) => (int)((ColorPatternR5g6b5[i] >> 0) & 0x1F);
+        public int ColorPatternR5g6b5G0(int i) => (int)((ColorPatternR5g6b5[i] >> 5) & 0x3F);
+        public int ColorPatternR5g6b5R0(int i) => (int)((ColorPatternR5g6b5[i] >> 11) & 0x1F);
+        public int ColorPatternR5g6b5B1(int i) => (int)((ColorPatternR5g6b5[i] >> 16) & 0x1F);
+        public int ColorPatternR5g6b5G1(int i) => (int)((ColorPatternR5g6b5[i] >> 21) & 0x3F);
+        public int ColorPatternR5g6b5R1(int i) => (int)((ColorPatternR5g6b5[i] >> 27) & 0x1F);
+        public Array32<uint> ColorPatternX1r5g5b5;
+        public int ColorPatternX1r5g5b5B0(int i) => (int)((ColorPatternX1r5g5b5[i] >> 0) & 0x1F);
+        public int ColorPatternX1r5g5b5G0(int i) => (int)((ColorPatternX1r5g5b5[i] >> 5) & 0x1F);
+        public int ColorPatternX1r5g5b5R0(int i) => (int)((ColorPatternX1r5g5b5[i] >> 10) & 0x1F);
+        public bool ColorPatternX1r5g5b5Ignore0(int i) => (ColorPatternX1r5g5b5[i] & 0x8000) != 0;
+        public int ColorPatternX1r5g5b5B1(int i) => (int)((ColorPatternX1r5g5b5[i] >> 16) & 0x1F);
+        public int ColorPatternX1r5g5b5G1(int i) => (int)((ColorPatternX1r5g5b5[i] >> 21) & 0x1F);
+        public int ColorPatternX1r5g5b5R1(int i) => (int)((ColorPatternX1r5g5b5[i] >> 26) & 0x1F);
+        public bool ColorPatternX1r5g5b5Ignore1(int i) => (ColorPatternX1r5g5b5[i] & 0x80000000) != 0;
+        public Array16<uint> ColorPatternY8;
+        public int ColorPatternY8Y0(int i) => (int)((ColorPatternY8[i] >> 0) & 0xFF);
+        public int ColorPatternY8Y1(int i) => (int)((ColorPatternY8[i] >> 8) & 0xFF);
+        public int ColorPatternY8Y2(int i) => (int)((ColorPatternY8[i] >> 16) & 0xFF);
+        public int ColorPatternY8Y3(int i) => (int)((ColorPatternY8[i] >> 24) & 0xFF);
+        public uint SetRenderSolidPrimColor0;
+        public uint SetRenderSolidPrimColor1;
+        public uint SetRenderSolidPrimColor2;
+        public uint SetRenderSolidPrimColor3;
+        public uint SetMmeMemAddressA;
+        public int SetMmeMemAddressAUpper => (int)((SetMmeMemAddressA >> 0) & 0x1FFFFFF);
+        public uint SetMmeMemAddressB;
+        public uint SetMmeDataRamAddress;
+        public uint MmeDmaRead;
+        public uint MmeDmaReadFifoed;
+        public uint MmeDmaWrite;
+        public uint MmeDmaReduction;
+        public MmeDmaReductionReductionOp MmeDmaReductionReductionOp => (MmeDmaReductionReductionOp)((MmeDmaReduction >> 0) & 0x7);
+        public MmeDmaReductionReductionFormat MmeDmaReductionReductionFormat => (MmeDmaReductionReductionFormat)((MmeDmaReduction >> 4) & 0x3);
+        public MmeDmaReductionReductionSize MmeDmaReductionReductionSize => (MmeDmaReductionReductionSize)((MmeDmaReduction >> 8) & 0x1);
+        public uint MmeDmaSysmembar;
+        public bool MmeDmaSysmembarV => (MmeDmaSysmembar & 0x1) != 0;
+        public uint MmeDmaSync;
+        public uint SetMmeDataFifoConfig;
+        public SetMmeDataFifoConfigFifoSize SetMmeDataFifoConfigFifoSize => (SetMmeDataFifoConfigFifoSize)((SetMmeDataFifoConfig >> 0) & 0x7);
+        public fixed uint Reserved578[2];
+        public uint RenderSolidPrimMode;
+        public RenderSolidPrimModeV RenderSolidPrimModeV => (RenderSolidPrimModeV)((RenderSolidPrimMode >> 0) & 0x7);
+        public uint SetRenderSolidPrimColorFormat;
+        public SetRenderSolidPrimColorFormatV SetRenderSolidPrimColorFormatV => (SetRenderSolidPrimColorFormatV)((SetRenderSolidPrimColorFormat >> 0) & 0xFF);
+        public uint SetRenderSolidPrimColor;
+        public uint SetRenderSolidLineTieBreakBits;
+        public bool SetRenderSolidLineTieBreakBitsXmajXincYinc => (SetRenderSolidLineTieBreakBits & 0x1) != 0;
+        public bool SetRenderSolidLineTieBreakBitsXmajXdecYinc => (SetRenderSolidLineTieBreakBits & 0x10) != 0;
+        public bool SetRenderSolidLineTieBreakBitsYmajXincYinc => (SetRenderSolidLineTieBreakBits & 0x100) != 0;
+        public bool SetRenderSolidLineTieBreakBitsYmajXdecYinc => (SetRenderSolidLineTieBreakBits & 0x1000) != 0;
+        public fixed uint Reserved590[20];
+        public uint RenderSolidPrimPointXY;
+        public int RenderSolidPrimPointXYX => (int)((RenderSolidPrimPointXY >> 0) & 0xFFFF);
+        public int RenderSolidPrimPointXYY => (int)((RenderSolidPrimPointXY >> 16) & 0xFFFF);
+        public fixed uint Reserved5E4[7];
+        public Array64<RenderSolidPrimPoint> RenderSolidPrimPoint;
+        public uint SetPixelsFromCpuDataType;
+        public SetPixelsFromCpuDataTypeV SetPixelsFromCpuDataTypeV => (SetPixelsFromCpuDataTypeV)((SetPixelsFromCpuDataType >> 0) & 0x1);
+        public uint SetPixelsFromCpuColorFormat;
+        public SetPixelsFromCpuColorFormatV SetPixelsFromCpuColorFormatV => (SetPixelsFromCpuColorFormatV)((SetPixelsFromCpuColorFormat >> 0) & 0xFF);
+        public uint SetPixelsFromCpuIndexFormat;
+        public SetPixelsFromCpuIndexFormatV SetPixelsFromCpuIndexFormatV => (SetPixelsFromCpuIndexFormatV)((SetPixelsFromCpuIndexFormat >> 0) & 0x3);
+        public uint SetPixelsFromCpuMonoFormat;
+        public SetPixelsFromCpuMonoFormatV SetPixelsFromCpuMonoFormatV => (SetPixelsFromCpuMonoFormatV)((SetPixelsFromCpuMonoFormat >> 0) & 0x1);
+        public uint SetPixelsFromCpuWrap;
+        public SetPixelsFromCpuWrapV SetPixelsFromCpuWrapV => (SetPixelsFromCpuWrapV)((SetPixelsFromCpuWrap >> 0) & 0x3);
+        public uint SetPixelsFromCpuColor0;
+        public uint SetPixelsFromCpuColor1;
+        public uint SetPixelsFromCpuMonoOpacity;
+        public SetPixelsFromCpuMonoOpacityV SetPixelsFromCpuMonoOpacityV => (SetPixelsFromCpuMonoOpacityV)((SetPixelsFromCpuMonoOpacity >> 0) & 0x1);
+        public fixed uint Reserved820[6];
+        public uint SetPixelsFromCpuSrcWidth;
+        public uint SetPixelsFromCpuSrcHeight;
+        public uint SetPixelsFromCpuDxDuFrac;
+        public uint SetPixelsFromCpuDxDuInt;
+        public uint SetPixelsFromCpuDyDvFrac;
+        public uint SetPixelsFromCpuDyDvInt;
+        public uint SetPixelsFromCpuDstX0Frac;
+        public uint SetPixelsFromCpuDstX0Int;
+        public uint SetPixelsFromCpuDstY0Frac;
+        public uint SetPixelsFromCpuDstY0Int;
+        public uint PixelsFromCpuData;
+        public fixed uint Reserved864[3];
+        public uint SetBigEndianControl;
+        public bool SetBigEndianControlX32Swap1 => (SetBigEndianControl & 0x1) != 0;
+        public bool SetBigEndianControlX32Swap4 => (SetBigEndianControl & 0x2) != 0;
+        public bool SetBigEndianControlX32Swap8 => (SetBigEndianControl & 0x4) != 0;
+        public bool SetBigEndianControlX32Swap16 => (SetBigEndianControl & 0x8) != 0;
+        public bool SetBigEndianControlX16Swap1 => (SetBigEndianControl & 0x10) != 0;
+        public bool SetBigEndianControlX16Swap4 => (SetBigEndianControl & 0x20) != 0;
+        public bool SetBigEndianControlX16Swap8 => (SetBigEndianControl & 0x40) != 0;
+        public bool SetBigEndianControlX16Swap16 => (SetBigEndianControl & 0x80) != 0;
+        public bool SetBigEndianControlX8Swap1 => (SetBigEndianControl & 0x100) != 0;
+        public bool SetBigEndianControlX8Swap4 => (SetBigEndianControl & 0x200) != 0;
+        public bool SetBigEndianControlX8Swap8 => (SetBigEndianControl & 0x400) != 0;
+        public bool SetBigEndianControlX8Swap16 => (SetBigEndianControl & 0x800) != 0;
+        public bool SetBigEndianControlI1X8Cga6Swap1 => (SetBigEndianControl & 0x1000) != 0;
+        public bool SetBigEndianControlI1X8Cga6Swap4 => (SetBigEndianControl & 0x2000) != 0;
+        public bool SetBigEndianControlI1X8Cga6Swap8 => (SetBigEndianControl & 0x4000) != 0;
+        public bool SetBigEndianControlI1X8Cga6Swap16 => (SetBigEndianControl & 0x8000) != 0;
+        public bool SetBigEndianControlI1X8LeSwap1 => (SetBigEndianControl & 0x10000) != 0;
+        public bool SetBigEndianControlI1X8LeSwap4 => (SetBigEndianControl & 0x20000) != 0;
+        public bool SetBigEndianControlI1X8LeSwap8 => (SetBigEndianControl & 0x40000) != 0;
+        public bool SetBigEndianControlI1X8LeSwap16 => (SetBigEndianControl & 0x80000) != 0;
+        public bool SetBigEndianControlI4Swap1 => (SetBigEndianControl & 0x100000) != 0;
+        public bool SetBigEndianControlI4Swap4 => (SetBigEndianControl & 0x200000) != 0;
+        public bool SetBigEndianControlI4Swap8 => (SetBigEndianControl & 0x400000) != 0;
+        public bool SetBigEndianControlI4Swap16 => (SetBigEndianControl & 0x800000) != 0;
+        public bool SetBigEndianControlI8Swap1 => (SetBigEndianControl & 0x1000000) != 0;
+        public bool SetBigEndianControlI8Swap4 => (SetBigEndianControl & 0x2000000) != 0;
+        public bool SetBigEndianControlI8Swap8 => (SetBigEndianControl & 0x4000000) != 0;
+        public bool SetBigEndianControlI8Swap16 => (SetBigEndianControl & 0x8000000) != 0;
+        public bool SetBigEndianControlOverride => (SetBigEndianControl & 0x10000000) != 0;
+        public fixed uint Reserved874[3];
+        public uint SetPixelsFromMemoryBlockShape;
+        public SetPixelsFromMemoryBlockShapeV SetPixelsFromMemoryBlockShapeV => (SetPixelsFromMemoryBlockShapeV)((SetPixelsFromMemoryBlockShape >> 0) & 0x7);
+        public uint SetPixelsFromMemoryCorralSize;
+        public int SetPixelsFromMemoryCorralSizeV => (int)((SetPixelsFromMemoryCorralSize >> 0) & 0x3FF);
+        public uint SetPixelsFromMemorySafeOverlap;
+        public bool SetPixelsFromMemorySafeOverlapV => (SetPixelsFromMemorySafeOverlap & 0x1) != 0;
+        public uint SetPixelsFromMemorySampleMode;
+        public SetPixelsFromMemorySampleModeOrigin SetPixelsFromMemorySampleModeOrigin => (SetPixelsFromMemorySampleModeOrigin)((SetPixelsFromMemorySampleMode >> 0) & 0x1);
+        public SetPixelsFromMemorySampleModeFilter SetPixelsFromMemorySampleModeFilter => (SetPixelsFromMemorySampleModeFilter)((SetPixelsFromMemorySampleMode >> 4) & 0x1);
+        public fixed uint Reserved890[8];
+        public uint SetPixelsFromMemoryDstX0;
+        public uint SetPixelsFromMemoryDstY0;
+        public uint SetPixelsFromMemoryDstWidth;
+        public uint SetPixelsFromMemoryDstHeight;
+        public uint SetPixelsFromMemoryDuDxFrac;
+        public uint SetPixelsFromMemoryDuDxInt;
+        public uint SetPixelsFromMemoryDvDyFrac;
+        public uint SetPixelsFromMemoryDvDyInt;
+        public uint SetPixelsFromMemorySrcX0Frac;
+        public uint SetPixelsFromMemorySrcX0Int;
+        public uint SetPixelsFromMemorySrcY0Frac;
+        public uint PixelsFromMemorySrcY0Int;
+        public uint SetFalcon00;
+        public uint SetFalcon01;
+        public uint SetFalcon02;
+        public uint SetFalcon03;
+        public uint SetFalcon04;
+        public uint SetFalcon05;
+        public uint SetFalcon06;
+        public uint SetFalcon07;
+        public uint SetFalcon08;
+        public uint SetFalcon09;
+        public uint SetFalcon10;
+        public uint SetFalcon11;
+        public uint SetFalcon12;
+        public uint SetFalcon13;
+        public uint SetFalcon14;
+        public uint SetFalcon15;
+        public uint SetFalcon16;
+        public uint SetFalcon17;
+        public uint SetFalcon18;
+        public uint SetFalcon19;
+        public uint SetFalcon20;
+        public uint SetFalcon21;
+        public uint SetFalcon22;
+        public uint SetFalcon23;
+        public uint SetFalcon24;
+        public uint SetFalcon25;
+        public uint SetFalcon26;
+        public uint SetFalcon27;
+        public uint SetFalcon28;
+        public uint SetFalcon29;
+        public uint SetFalcon30;
+        public uint SetFalcon31;
+        public fixed uint Reserved960[291];
+        public uint MmeDmaWriteMethodBarrier;
+        public bool MmeDmaWriteMethodBarrierV => (MmeDmaWriteMethodBarrier & 0x1) != 0;
+        public fixed uint ReservedDF0[2436];
+        public MmeShadowScratch SetMmeShadowScratch;
+#pragma warning restore CS0649
+    }
+}
diff --git a/Ryujinx.Graphics.Gpu/Image/TextureBindingsManager.cs b/Ryujinx.Graphics.Gpu/Image/TextureBindingsManager.cs
index 14bc27a96e..b247f99ff4 100644
--- a/Ryujinx.Graphics.Gpu/Image/TextureBindingsManager.cs
+++ b/Ryujinx.Graphics.Gpu/Image/TextureBindingsManager.cs
@@ -425,22 +425,28 @@ namespace Ryujinx.Graphics.Gpu.Image
         /// <summary>
         /// Gets the texture descriptor for a given texture handle.
         /// </summary>
-        /// <param name="state">The current GPU state</param>
+        /// <param name="poolGpuVa">GPU virtual address of the texture pool</param>
+        /// <param name="bufferIndex">Index of the constant buffer with texture handles</param>
+        /// <param name="maximumId">Maximum ID of the texture pool</param>
         /// <param name="stageIndex">The stage number where the texture is bound</param>
         /// <param name="handle">The texture handle</param>
         /// <param name="cbufSlot">The texture handle's constant buffer slot</param>
         /// <returns>The texture descriptor for the specified texture</returns>
-        public TextureDescriptor GetTextureDescriptor(GpuState state, int stageIndex, int handle, int cbufSlot)
+        public TextureDescriptor GetTextureDescriptor(
+            ulong poolGpuVa,
+            int bufferIndex,
+            int maximumId,
+            int stageIndex,
+            int handle,
+            int cbufSlot)
         {
-            int textureBufferIndex = cbufSlot < 0 ? state.Get<int>(MethodOffset.TextureBufferIndex) : cbufSlot & SlotMask;
+            int textureBufferIndex = cbufSlot < 0 ? bufferIndex : cbufSlot & SlotMask;
             int packedId = ReadPackedId(stageIndex, handle, textureBufferIndex, textureBufferIndex);
             int textureId = UnpackTextureId(packedId);
 
-            var poolState = state.Get<PoolState>(MethodOffset.TexturePoolState);
+            ulong poolAddress = _channel.MemoryManager.Translate(poolGpuVa);
 
-            ulong poolAddress = _channel.MemoryManager.Translate(poolState.Address.Pack());
-
-            TexturePool texturePool = _texturePoolCache.FindOrCreate(_channel, poolAddress, poolState.MaximumId);
+            TexturePool texturePool = _texturePoolCache.FindOrCreate(_channel, poolAddress, maximumId);
 
             return texturePool.GetDescriptor(textureId);
         }
diff --git a/Ryujinx.Graphics.Gpu/Image/TextureCache.cs b/Ryujinx.Graphics.Gpu/Image/TextureCache.cs
index 37a2219ffa..106dc8e894 100644
--- a/Ryujinx.Graphics.Gpu/Image/TextureCache.cs
+++ b/Ryujinx.Graphics.Gpu/Image/TextureCache.cs
@@ -753,21 +753,30 @@ namespace Ryujinx.Graphics.Gpu.Image
         /// </summary>
         /// <param name="memoryManager">GPU memory manager where the texture is mapped</param>
         /// <param name="tex">The texture information</param>
-        /// <param name="cbp">The copy buffer parameters</param>
-        /// <param name="swizzle">The copy buffer swizzle</param>
+        /// <param name="gpuVa">GPU virtual address of the texture</param>
+        /// <param name="bpp">Bytes per pixel</param>
+        /// <param name="stride">If <paramref name="linear"/> is true, should have the texture stride, otherwise ignored</param>
+        /// <param name="xCount">Number of pixels to be copied per line</param>
+        /// <param name="yCount">Number of lines to be copied</param>
         /// <param name="linear">True if the texture has a linear layout, false otherwise</param>
         /// <returns>A matching texture, or null if there is no match</returns>
-        public Texture FindTexture(MemoryManager memoryManager, CopyBufferTexture tex, CopyBufferParams cbp, CopyBufferSwizzle swizzle, bool linear)
+        public Texture FindTexture(
+            MemoryManager memoryManager,
+            CopyBufferTexture tex,
+            ulong gpuVa,
+            int bpp,
+            int stride,
+            int xCount,
+            int yCount,
+            bool linear)
         {
-            ulong address = memoryManager.Translate(cbp.DstAddress.Pack());
+            ulong address = memoryManager.Translate(gpuVa);
 
             if (address == MemoryManager.PteUnmapped)
             {
                 return null;
             }
 
-            int bpp = swizzle.UnpackDstComponentsCount() * swizzle.UnpackComponentSize();
-
             int addressMatches = _textures.FindOverlaps(address, ref _textureOverlaps);
 
             for (int i = 0; i < addressMatches; i++)
@@ -786,7 +795,7 @@ namespace Ryujinx.Graphics.Gpu.Image
                 {
                     // Size is not available for linear textures. Use the stride and end of the copy region instead.
 
-                    match = texture.Info.IsLinear && texture.Info.Stride == cbp.DstStride && tex.RegionY + cbp.YCount <= texture.Info.Height;
+                    match = texture.Info.IsLinear && texture.Info.Stride == stride && tex.RegionY + yCount <= texture.Info.Height;
                 }
                 else
                 {
@@ -794,7 +803,7 @@ namespace Ryujinx.Graphics.Gpu.Image
                     // Due to the way linear strided and block layouts work, widths can be multiplied by Bpp for comparison.
                     // Note: tex.Width is the aligned texture size. Prefer param.XCount, as the destination should be a texture with that exact size.
 
-                    bool sizeMatch = cbp.XCount * bpp == texture.Info.Width * format.BytesPerPixel && tex.Height == texture.Info.Height;
+                    bool sizeMatch = xCount * bpp == texture.Info.Width * format.BytesPerPixel && tex.Height == texture.Info.Height;
                     bool formatMatch = !texture.Info.IsLinear &&
                                         texture.Info.GobBlocksInY == tex.MemoryLayout.UnpackGobBlocksInY() &&
                                         texture.Info.GobBlocksInZ == tex.MemoryLayout.UnpackGobBlocksInZ();
diff --git a/Ryujinx.Graphics.Gpu/Image/TextureManager.cs b/Ryujinx.Graphics.Gpu/Image/TextureManager.cs
index 9cab343adb..fcc67f7236 100644
--- a/Ryujinx.Graphics.Gpu/Image/TextureManager.cs
+++ b/Ryujinx.Graphics.Gpu/Image/TextureManager.cs
@@ -313,26 +313,36 @@ namespace Ryujinx.Graphics.Gpu.Image
         /// <summary>
         /// Gets a texture descriptor used on the compute pipeline.
         /// </summary>
-        /// <param name="state">Current GPU state</param>
+        /// <param name="poolGpuVa">GPU virtual address of the texture pool</param>
+        /// <param name="bufferIndex">Index of the constant buffer with texture handles</param>
+        /// <param name="maximumId">Maximum ID of the texture pool</param>
         /// <param name="handle">Shader "fake" handle of the texture</param>
         /// <param name="cbufSlot">Shader constant buffer slot of the texture</param>
         /// <returns>The texture descriptor</returns>
-        public TextureDescriptor GetComputeTextureDescriptor(GpuState state, int handle, int cbufSlot)
+        public TextureDescriptor GetComputeTextureDescriptor(ulong poolGpuVa, int bufferIndex, int maximumId, int handle, int cbufSlot)
         {
-            return _cpBindingsManager.GetTextureDescriptor(state, 0, handle, cbufSlot);
+            return _cpBindingsManager.GetTextureDescriptor(poolGpuVa, bufferIndex, maximumId, 0, handle, cbufSlot);
         }
 
         /// <summary>
         /// Gets a texture descriptor used on the graphics pipeline.
         /// </summary>
-        /// <param name="state">Current GPU state</param>
+        /// <param name="poolGpuVa">GPU virtual address of the texture pool</param>
+        /// <param name="bufferIndex">Index of the constant buffer with texture handles</param>
+        /// <param name="maximumId">Maximum ID of the texture pool</param>
         /// <param name="stageIndex">Index of the shader stage where the texture is bound</param>
         /// <param name="handle">Shader "fake" handle of the texture</param>
         /// <param name="cbufSlot">Shader constant buffer slot of the texture</param>
         /// <returns>The texture descriptor</returns>
-        public TextureDescriptor GetGraphicsTextureDescriptor(GpuState state, int stageIndex, int handle, int cbufSlot)
+        public TextureDescriptor GetGraphicsTextureDescriptor(
+            ulong poolGpuVa,
+            int bufferIndex,
+            int maximumId,
+            int stageIndex,
+            int handle,
+            int cbufSlot)
         {
-            return _gpBindingsManager.GetTextureDescriptor(state, stageIndex, handle, cbufSlot);
+            return _gpBindingsManager.GetTextureDescriptor(poolGpuVa, bufferIndex, maximumId, stageIndex, handle, cbufSlot);
         }
 
         /// <summary>
diff --git a/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs b/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs
index b78cbdaa60..6c5116ba3d 100644
--- a/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs
+++ b/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs
@@ -244,10 +244,10 @@ namespace Ryujinx.Graphics.Gpu.Memory
         /// <param name="srcVa">GPU virtual address of the copy source</param>
         /// <param name="dstVa">GPU virtual address of the copy destination</param>
         /// <param name="size">Size in bytes of the copy</param>
-        public void CopyBuffer(MemoryManager memoryManager, GpuVa srcVa, GpuVa dstVa, ulong size)
+        public void CopyBuffer(MemoryManager memoryManager, ulong srcVa, ulong dstVa, ulong size)
         {
-            ulong srcAddress = TranslateAndCreateBuffer(memoryManager, srcVa.Pack(), size);
-            ulong dstAddress = TranslateAndCreateBuffer(memoryManager, dstVa.Pack(), size);
+            ulong srcAddress = TranslateAndCreateBuffer(memoryManager, srcVa, size);
+            ulong dstAddress = TranslateAndCreateBuffer(memoryManager, dstVa, size);
 
             Buffer srcBuffer = GetBuffer(srcAddress, size);
             Buffer dstBuffer = GetBuffer(dstAddress, size);
@@ -285,9 +285,9 @@ namespace Ryujinx.Graphics.Gpu.Memory
         /// <param name="gpuVa">GPU virtual address of the region to clear</param>
         /// <param name="size">Number of bytes to clear</param>
         /// <param name="value">Value to be written into the buffer</param>
-        public void ClearBuffer(MemoryManager memoryManager, GpuVa gpuVa, ulong size, uint value)
+        public void ClearBuffer(MemoryManager memoryManager, ulong gpuVa, ulong size, uint value)
         {
-            ulong address = TranslateAndCreateBuffer(memoryManager, gpuVa.Pack(), size);
+            ulong address = TranslateAndCreateBuffer(memoryManager, gpuVa, size);
 
             Buffer buffer = GetBuffer(address, size);
 
diff --git a/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs
index 62368c1c45..7fb979f4a5 100644
--- a/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs
@@ -1,6 +1,5 @@
 using Ryujinx.Common.Logging;
 using Ryujinx.Graphics.GAL;
-using Ryujinx.Graphics.Gpu.State;
 using Ryujinx.Graphics.Shader;
 
 namespace Ryujinx.Graphics.Gpu.Shader
@@ -11,7 +10,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
     class GpuAccessor : TextureDescriptorCapableGpuAccessor, IGpuAccessor
     {
         private readonly GpuContext _context;
-        private readonly GpuState _state;
+        private readonly GpuChannel _channel;
+        private readonly GpuAccessorState _state;
         private readonly int _stageIndex;
         private readonly bool _compute;
         private readonly int _localSizeX;
@@ -24,11 +24,13 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// Creates a new instance of the GPU state accessor for graphics shader translation.
         /// </summary>
         /// <param name="context">GPU context</param>
+        /// <param name="channel">GPU channel</param>
         /// <param name="state">Current GPU state</param>
         /// <param name="stageIndex">Graphics shader stage index (0 = Vertex, 4 = Fragment)</param>
-        public GpuAccessor(GpuContext context, GpuState state, int stageIndex)
+        public GpuAccessor(GpuContext context, GpuChannel channel, GpuAccessorState state, int stageIndex)
         {
             _context = context;
+            _channel = channel;
             _state = state;
             _stageIndex = stageIndex;
         }
@@ -37,6 +39,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// Creates a new instance of the GPU state accessor for compute shader translation.
         /// </summary>
         /// <param name="context">GPU context</param>
+        /// <param name="channel">GPU channel</param>
         /// <param name="state">Current GPU state</param>
         /// <param name="localSizeX">Local group size X of the compute shader</param>
         /// <param name="localSizeY">Local group size Y of the compute shader</param>
@@ -45,7 +48,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <param name="sharedMemorySize">Shared memory size of the compute shader</param>
         public GpuAccessor(
             GpuContext context,
-            GpuState state,
+            GpuChannel channel,
+            GpuAccessorState state,
             int localSizeX,
             int localSizeY,
             int localSizeZ,
@@ -53,6 +57,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
             int sharedMemorySize)
         {
             _context = context;
+            _channel = channel;
             _state = state;
             _compute = true;
             _localSizeX = localSizeX;
@@ -79,7 +84,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <returns>Data at the memory location</returns>
         public override T MemoryRead<T>(ulong address)
         {
-            return _state.Channel.MemoryManager.Read<T>(address);
+            return _channel.MemoryManager.Read<T>(address);
         }
 
         /// <summary>
@@ -89,7 +94,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <returns>True if the address is mapped, false otherwise</returns>
         public bool MemoryMapped(ulong address)
         {
-            return _state.Channel.MemoryManager.IsMapped(address);
+            return _channel.MemoryManager.IsMapped(address);
         }
 
         /// <summary>
@@ -129,8 +134,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
         public uint QueryConstantBufferUse()
         {
             return _compute
-                ? _state.Channel.BufferManager.GetComputeUniformBufferUseMask()
-                : _state.Channel.BufferManager.GetGraphicsUniformBufferUseMask(_stageIndex);
+                ? _channel.BufferManager.GetComputeUniformBufferUseMask()
+                : _channel.BufferManager.GetGraphicsUniformBufferUseMask(_stageIndex);
         }
 
         /// <summary>
@@ -196,11 +201,22 @@ namespace Ryujinx.Graphics.Gpu.Shader
         {
             if (_compute)
             {
-                return _state.Channel.TextureManager.GetComputeTextureDescriptor(_state, handle, cbufSlot);
+                return _channel.TextureManager.GetComputeTextureDescriptor(
+                    _state.TexturePoolGpuVa,
+                    _state.TextureBufferIndex,
+                    _state.TexturePoolMaximumId,
+                    handle,
+                    cbufSlot);
             }
             else
             {
-                return _state.Channel.TextureManager.GetGraphicsTextureDescriptor(_state, _stageIndex, handle, cbufSlot);
+                return _channel.TextureManager.GetGraphicsTextureDescriptor(
+                    _state.TexturePoolGpuVa,
+                    _state.TextureBufferIndex,
+                    _state.TexturePoolMaximumId,
+                    _stageIndex,
+                    handle,
+                    cbufSlot);
             }
         }
 
@@ -210,7 +226,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <returns>True if early depth testing is forced</returns>
         public bool QueryEarlyZForce()
         {
-            return _state.Get<bool>(MethodOffset.EarlyZForce);
+            return _state.EarlyZForce;
         }
     }
 }
diff --git a/Ryujinx.Graphics.Gpu/Shader/GpuAccessorState.cs b/Ryujinx.Graphics.Gpu/Shader/GpuAccessorState.cs
new file mode 100644
index 0000000000..17660cf9ff
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Shader/GpuAccessorState.cs
@@ -0,0 +1,43 @@
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+    /// <summary>
+    /// State used by the <see cref="GpuAccessor"/>.
+    /// </summary>
+    struct GpuAccessorState
+    {
+        /// <summary>
+        /// GPU virtual address of the texture pool.
+        /// </summary>
+        public ulong TexturePoolGpuVa { get; }
+
+        /// <summary>
+        /// Maximum ID of the texture pool.
+        /// </summary>
+        public int TexturePoolMaximumId { get; }
+
+        /// <summary>
+        /// Constant buffer slot where the texture handles are located.
+        /// </summary>
+        public int TextureBufferIndex { get; }
+
+        /// <summary>
+        /// Early Z force enable.
+        /// </summary>
+        public bool EarlyZForce { get; }
+
+        /// <summary>
+        /// Creates a new instance of the GPU accessor state.
+        /// </summary>
+        /// <param name="texturePoolGpuVa">GPU virtual address of the texture pool</param>
+        /// <param name="texturePoolMaximumId">Maximum ID of the texture pool</param>
+        /// <param name="textureBufferIndex">Constant buffer slot where the texture handles are located</param>
+        /// <param name="earlyZForce">Early Z force enable</param>
+        public GpuAccessorState(ulong texturePoolGpuVa, int texturePoolMaximumId, int textureBufferIndex, bool earlyZForce)
+        {
+            TexturePoolGpuVa = texturePoolGpuVa;
+            TexturePoolMaximumId = texturePoolMaximumId;
+            TextureBufferIndex = textureBufferIndex;
+            EarlyZForce = earlyZForce;
+        }
+    }
+}
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
index 46a6ba98d4..e9df6bfbd6 100644
--- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
@@ -475,7 +475,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <remarks>
         /// This automatically translates, compiles and adds the code to the cache if not present.
         /// </remarks>
-        /// <param name="state">Current GPU state</param>
+        /// <param name="channel">GPU channel</param>
+        /// <param name="gas">GPU accessor state</param>
         /// <param name="gpuVa">GPU virtual address of the binary shader code</param>
         /// <param name="localSizeX">Local group size X of the computer shader</param>
         /// <param name="localSizeY">Local group size Y of the computer shader</param>
@@ -484,7 +485,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <param name="sharedMemorySize">Shared memory size of the compute shader</param>
         /// <returns>Compiled compute shader code</returns>
         public ShaderBundle GetComputeShader(
-            GpuState state,
+            GpuChannel channel,
+            GpuAccessorState gas,
             ulong gpuVa,
             int localSizeX,
             int localSizeY,
@@ -498,7 +500,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
             {
                 foreach (ShaderBundle cachedCpShader in list)
                 {
-                    if (IsShaderEqual(state.Channel.MemoryManager, cachedCpShader, gpuVa))
+                    if (IsShaderEqual(channel.MemoryManager, cachedCpShader, gpuVa))
                     {
                         return cachedCpShader;
                     }
@@ -508,7 +510,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
             TranslatorContext[] shaderContexts = new TranslatorContext[1];
 
             shaderContexts[0] = DecodeComputeShader(
-                state,
+                channel,
+                gas,
                 gpuVa,
                 localSizeX,
                 localSizeY,
@@ -533,7 +536,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
                 isShaderCacheReadOnly = _cacheManager.IsReadOnly;
 
                 // Compute hash and prepare data for shader disk cache comparison.
-                shaderCacheEntries = CacheHelper.CreateShaderCacheEntries(state.Channel.MemoryManager, shaderContexts);
+                shaderCacheEntries = CacheHelper.CreateShaderCacheEntries(channel.MemoryManager, shaderContexts);
                 programCodeHash = CacheHelper.ComputeGuestHashFromCache(shaderCacheEntries);
             }
 
@@ -548,7 +551,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
                 }
 
                 // The shader isn't currently cached, translate it and compile it.
-                ShaderCodeHolder shader = TranslateShader(state.Channel.MemoryManager, shaderContexts[0]);
+                ShaderCodeHolder shader = TranslateShader(channel.MemoryManager, shaderContexts[0]);
 
                 shader.HostShader = _context.Renderer.CompileShader(ShaderStage.Compute, shader.Program.Code);
 
@@ -832,7 +835,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <summary>
         /// Decode the binary Maxwell shader code to a translator context.
         /// </summary>
-        /// <param name="state">Current GPU state</param>
+        /// <param name="channel">GPU channel</param>
+        /// <param name="gas">GPU accessor state</param>
         /// <param name="gpuVa">GPU virtual address of the binary shader code</param>
         /// <param name="localSizeX">Local group size X of the computer shader</param>
         /// <param name="localSizeY">Local group size Y of the computer shader</param>
@@ -841,7 +845,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <param name="sharedMemorySize">Shared memory size of the compute shader</param>
         /// <returns>The generated translator context</returns>
         private TranslatorContext DecodeComputeShader(
-            GpuState state,
+            GpuChannel channel,
+            GpuAccessorState gas,
             ulong gpuVa,
             int localSizeX,
             int localSizeY,
@@ -854,7 +859,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
                 return null;
             }
 
-            GpuAccessor gpuAccessor = new GpuAccessor(_context, state, localSizeX, localSizeY, localSizeZ, localMemorySize, sharedMemorySize);
+            GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gas, localSizeX, localSizeY, localSizeZ, localMemorySize, sharedMemorySize);
 
             var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, DefaultFlags | TranslationFlags.Compute);
             return Translator.CreateContext(gpuVa, gpuAccessor, options);
@@ -884,7 +889,13 @@ namespace Ryujinx.Graphics.Gpu.Shader
                 return null;
             }
 
-            GpuAccessor gpuAccessor = new GpuAccessor(_context, state, (int)stage - 1);
+            GpuAccessorState gas = new GpuAccessorState(
+                state.Get<PoolState>(MethodOffset.TexturePoolState).Address.Pack(),
+                state.Get<PoolState>(MethodOffset.TexturePoolState).MaximumId,
+                state.Get<int>(MethodOffset.TextureBufferIndex),
+                state.Get<Boolean32>(MethodOffset.EarlyZForce));
+
+            GpuAccessor gpuAccessor = new GpuAccessor(_context, state.Channel, gas, (int)stage - 1);
 
             var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags);
             return Translator.CreateContext(gpuVa, gpuAccessor, options, counts);
diff --git a/Ryujinx.Graphics.Gpu/State/BufferSwizzleComponent.cs b/Ryujinx.Graphics.Gpu/State/BufferSwizzleComponent.cs
deleted file mode 100644
index 5c23cb2d5e..0000000000
--- a/Ryujinx.Graphics.Gpu/State/BufferSwizzleComponent.cs
+++ /dev/null
@@ -1,16 +0,0 @@
-namespace Ryujinx.Graphics.Gpu.State
-{
-    /// <summary>
-    /// Buffer swizzle component.
-    /// </summary>
-    enum BufferSwizzleComponent
-    {
-        SrcX,
-        SrcY,
-        SrcZ,
-        SrcW,
-        ConstA,
-        ConstB,
-        NoWrite
-    }
-}
diff --git a/Ryujinx.Graphics.Gpu/State/CopyBufferParams.cs b/Ryujinx.Graphics.Gpu/State/CopyBufferParams.cs
deleted file mode 100644
index 67c3e21485..0000000000
--- a/Ryujinx.Graphics.Gpu/State/CopyBufferParams.cs
+++ /dev/null
@@ -1,17 +0,0 @@
-namespace Ryujinx.Graphics.Gpu.State
-{
-    /// <summary>
-    /// Buffer to buffer copy parameters.
-    /// </summary>
-    struct CopyBufferParams
-    {
-#pragma warning disable CS0649
-        public GpuVa SrcAddress;
-        public GpuVa DstAddress;
-        public int   SrcStride;
-        public int   DstStride;
-        public int   XCount;
-        public int   YCount;
-#pragma warning restore CS0649
-    }
-}
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Gpu/State/CopyBufferSwizzle.cs b/Ryujinx.Graphics.Gpu/State/CopyBufferSwizzle.cs
deleted file mode 100644
index 94b650c4e3..0000000000
--- a/Ryujinx.Graphics.Gpu/State/CopyBufferSwizzle.cs
+++ /dev/null
@@ -1,75 +0,0 @@
-namespace Ryujinx.Graphics.Gpu.State
-{
-    /// <summary>
-    /// Buffer to buffer copy vector swizzle parameters.
-    /// </summary>
-    struct CopyBufferSwizzle
-    {
-#pragma warning disable CS0649
-        public uint Swizzle;
-#pragma warning restore CS0649
-
-        /// <summary>
-        /// Unpacks the source for the buffer destination vector X component.
-        /// </summary>
-        /// <returns>Destination component</returns>
-        public BufferSwizzleComponent UnpackDstX()
-        {
-            return (BufferSwizzleComponent)(Swizzle & 7);
-        }
-
-        /// <summary>
-        /// Unpacks the source for the buffer destination vector Y component.
-        /// </summary>
-        /// <returns>Destination component</returns>
-        public BufferSwizzleComponent UnpackDstY()
-        {
-            return (BufferSwizzleComponent)((Swizzle >> 4) & 7);
-        }
-
-        /// <summary>
-        /// Unpacks the source for the buffer destination vector Z component.
-        /// </summary>
-        /// <returns>Destination component</returns>
-        public BufferSwizzleComponent UnpackDstZ()
-        {
-            return (BufferSwizzleComponent)((Swizzle >> 8) & 7);
-        }
-
-        /// <summary>
-        /// Unpacks the source for the buffer destination vector W component.
-        /// </summary>
-        /// <returns>Destination component</returns>
-        public BufferSwizzleComponent UnpackDstW()
-        {
-            return (BufferSwizzleComponent)((Swizzle >> 12) & 7);
-        }
-
-        /// <summary>
-        /// Unpacks the size of each vector component of the copy.
-        /// </summary>
-        /// <returns>Vector component size</returns>
-        public int UnpackComponentSize()
-        {
-            return (int)((Swizzle >> 16) & 3) + 1;
-        }
-
-        /// <summary>
-        /// Unpacks the number of components of the source vector of the copy.
-        /// </summary>
-        /// <returns>Number of vector components</returns>
-        public int UnpackSrcComponentsCount()
-        {
-            return (int)((Swizzle >> 20) & 7) + 1;
-        }
-
-        /// <summary>
-        /// Unpacks the number of components of the destination vector of the copy.
-        /// </summary>
-        /// <returns>Number of vector components</returns>
-        public int UnpackDstComponentsCount()
-        {
-            return (int)((Swizzle >> 24) & 7) + 1;
-        }
-    }
-}
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Gpu/State/CopyTextureControl.cs b/Ryujinx.Graphics.Gpu/State/CopyTextureControl.cs
deleted file mode 100644
index d6256f6858..0000000000
--- a/Ryujinx.Graphics.Gpu/State/CopyTextureControl.cs
+++ /dev/null
@@ -1,22 +0,0 @@
-namespace Ryujinx.Graphics.Gpu.State
-{
-    /// <summary>
-    /// Texture to texture copy control.
-    /// </summary>
-    struct CopyTextureControl
-    {
-#pragma warning disable CS0649
-        public uint Packed;
-#pragma warning restore CS0649
-
-        public bool UnpackOriginCorner()
-        {
-            return (Packed & 1u) != 0;
-        }
-
-        public bool UnpackLinearFilter()
-        {
-            return (Packed & (1u << 4)) != 0;
-        }
-    }
-}
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Gpu/State/GpuState.cs b/Ryujinx.Graphics.Gpu/State/GpuState.cs
index ff4d782969..0b209da706 100644
--- a/Ryujinx.Graphics.Gpu/State/GpuState.cs
+++ b/Ryujinx.Graphics.Gpu/State/GpuState.cs
@@ -1,4 +1,5 @@
-using Ryujinx.Graphics.Gpu.Image;
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.Gpu.Image;
 using System;
 using System.Runtime.InteropServices;
 
@@ -33,6 +34,8 @@ namespace Ryujinx.Graphics.Gpu.State
 
         private readonly Register[] _registers;
 
+        private readonly IDeviceState _deviceState;
+
         /// <summary>
         /// Gets or sets the shadow ram control used for this sub-channel.
         /// </summary>
@@ -47,9 +50,11 @@ namespace Ryujinx.Graphics.Gpu.State
         /// Creates a new instance of the GPU state.
         /// </summary>
         /// <param name="channel">Channel that the sub-channel state belongs to</param>
-        public GpuState(GpuChannel channel)
+        /// <param name="deviceState">Optional device state that will replace the internal backing storage</param>
+        public GpuState(GpuChannel channel, IDeviceState deviceState = null)
         {
             Channel = channel;
+            _deviceState = deviceState;
 
             _memory = new int[RegistersCount];
             _shadow = new int[RegistersCount];
@@ -107,16 +112,23 @@ namespace Ryujinx.Graphics.Gpu.State
                 }
             }
 
-            Register register = _registers[meth.Method];
-
-            if (_memory[meth.Method] != value)
+            if (_deviceState != null)
             {
-                _registers[(int)register.BaseOffset].Modified = true;
+                _deviceState.Write(meth.Method * 4, meth.Argument);
             }
+            else
+            {
+                Register register = _registers[meth.Method];
 
-            _memory[meth.Method] = value;
+                if (_memory[meth.Method] != value)
+                {
+                    _registers[(int)register.BaseOffset].Modified = true;
+                }
 
-            register.Callback?.Invoke(this, value);
+                _memory[meth.Method] = value;
+
+                register.Callback?.Invoke(this, value);
+            }
         }
 
         /// <summary>
@@ -126,6 +138,11 @@ namespace Ryujinx.Graphics.Gpu.State
         /// <returns>Data at the register</returns>
         public int Read(int offset)
         {
+            if (_deviceState != null)
+            {
+                return _deviceState.Read(offset * 4);
+            }
+
             return _memory[offset];
         }
 
diff --git a/Ryujinx.Graphics.Gpu/State/SbDescriptor.cs b/Ryujinx.Graphics.Gpu/State/SbDescriptor.cs
new file mode 100644
index 0000000000..9723b71979
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/State/SbDescriptor.cs
@@ -0,0 +1,20 @@
+namespace Ryujinx.Graphics.Gpu.State
+{
+    /// <summary>
+    /// Storage buffer address and size information.
+    /// </summary>
+    struct SbDescriptor
+    {
+#pragma warning disable CS0649
+        public uint AddressLow;
+        public uint AddressHigh;
+        public int Size;
+        public int Padding;
+#pragma warning restore CS0649
+
+        public ulong PackAddress()
+        {
+            return AddressLow | ((ulong)AddressHigh << 32);
+        }
+    }
+}