From 04dce402ac94679c5439038be1c8ce090e7ad4cb Mon Sep 17 00:00:00 2001
From: gdkchan <gab.dark.100@gmail.com>
Date: Mon, 12 Jul 2021 16:48:57 -0300
Subject: [PATCH] Implement a fast path for I2M transfers (#2467)

---
 .../Engine/Compute/ComputeClass.cs            |  9 ++++
 .../Engine/GPFifo/GPFifoProcessor.cs          | 53 +++++++++++++++++--
 .../InlineToMemory/InlineToMemoryClass.cs     | 20 +++++++
 .../Engine/Threed/ThreedClass.cs              |  9 ++++
 4 files changed, 88 insertions(+), 3 deletions(-)

diff --git a/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs b/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs
index ac8b1186bd..a0304308d8 100644
--- a/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs
@@ -68,6 +68,15 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute
             _i2mClass.LaunchDma(ref Unsafe.As<ComputeClassState, InlineToMemoryClassState>(ref _state.State), argument);
         }
 
+        /// <summary>
+        /// Pushes a block of data to the Inline-to-Memory engine.
+        /// </summary>
+        /// <param name="data">Data to push</param>
+        public void LoadInlineData(ReadOnlySpan<int> data)
+        {
+            _i2mClass.LoadInlineData(data);
+        }
+
         /// <summary>
         /// Pushes a word of data to the Inline-to-Memory engine.
         /// </summary>
diff --git a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs
index dd5e6fe57f..ea34d6cd1a 100644
--- a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs
@@ -18,6 +18,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
         private const int MacrosCount = 0x80;
         private const int MacroIndexMask = MacrosCount - 1;
 
+        private const int LoadInlineDataMethodOffset = 0x6d;
         private const int UniformBufferUpdateDataMethodOffset = 0x8e4;
 
         private readonly GpuChannel _channel;
@@ -78,6 +79,11 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
 
                 if (_state.MethodCount != 0)
                 {
+                    if (TryFastI2mBufferUpdate(commandBuffer, ref index))
+                    {
+                        continue;
+                    }
+
                     Send(_state.Method, command, _state.SubChannel, _state.MethodCount <= 1);
 
                     if (!_state.NonIncrementing)
@@ -123,6 +129,46 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
             _3dClass.FlushUboDirty();
         }
 
+        /// <summary>
+        /// Tries to perform a fast Inline-to-Memory data update.
+        /// If successful, all data will be copied at once, and <see cref="DmaState.MethodCount"/>
+        /// command buffer entries will be consumed.
+        /// </summary>
+        /// <param name="commandBuffer">Command buffer where the data is contained</param>
+        /// <param name="offset">Offset at <paramref name="commandBuffer"/> where the data is located, auto-incremented on success</param>
+        /// <returns>True if the fast copy was successful, false otherwise</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private bool TryFastI2mBufferUpdate(ReadOnlySpan<int> commandBuffer, ref int offset)
+        {
+            if (_state.Method == LoadInlineDataMethodOffset && _state.NonIncrementing && _state.SubChannel <= 2)
+            {
+                int availableCount = commandBuffer.Length - offset;
+                int consumeCount = Math.Min(_state.MethodCount, availableCount);
+
+                var data = commandBuffer.Slice(offset, consumeCount);
+
+                if (_state.SubChannel == 0)
+                {
+                    _3dClass.LoadInlineData(data);
+                }
+                else if (_state.SubChannel == 1)
+                {
+                    _computeClass.LoadInlineData(data);
+                }
+                else /* if (_state.SubChannel == 2) */
+                {
+                    _i2mClass.LoadInlineData(data);
+                }
+
+                offset += consumeCount - 1;
+                _state.MethodCount -= consumeCount;
+
+                return true;
+            }
+
+            return false;
+        }
+
         /// <summary>
         /// Tries to perform a fast constant buffer data update.
         /// If successful, all data will be copied at once, and <see cref="CompressedMethod.MethodCount"/> + 1
@@ -132,13 +178,14 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
         /// <param name="commandBuffer">Command buffer where <paramref name="meth"/> is contained</param>
         /// <param name="offset">Offset at <paramref name="commandBuffer"/> where <paramref name="meth"/> is located</param>
         /// <returns>True if the fast copy was successful, false otherwise</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private bool TryFastUniformBufferUpdate(CompressedMethod meth, ReadOnlySpan<int> commandBuffer, int offset)
         {
             int availableCount = commandBuffer.Length - offset;
 
-            if (meth.MethodCount < availableCount &&
-                meth.SecOp == SecOp.NonIncMethod &&
-                meth.MethodAddress == UniformBufferUpdateDataMethodOffset)
+            if (meth.MethodAddress == UniformBufferUpdateDataMethodOffset &&
+                meth.MethodCount < availableCount &&
+                meth.SecOp == SecOp.NonIncMethod)
             {
                 _3dClass.ConstantBufferUpdate(commandBuffer.Slice(offset + 1, meth.MethodCount));
 
diff --git a/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs b/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs
index cb4133ec5d..186a464888 100644
--- a/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs
@@ -127,6 +127,26 @@ namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory
             _finished = false;
         }
 
+        /// <summary>
+        /// Pushes a block of data to the Inline-to-Memory engine.
+        /// </summary>
+        /// <param name="data">Data to push</param>
+        public void LoadInlineData(ReadOnlySpan<int> data)
+        {
+            if (!_finished)
+            {
+                int copySize = Math.Min(data.Length, _buffer.Length - _offset);
+                data.Slice(0, copySize).CopyTo(new Span<int>(_buffer).Slice(_offset, copySize));
+
+                _offset += copySize;
+
+                if (_offset * 4 >= _size)
+                {
+                    FinishTransfer();
+                }
+            }
+        }
+
         /// <summary>
         /// Pushes a word of data to the Inline-to-Memory engine.
         /// </summary>
diff --git a/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs b/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs
index ad6a1d0e09..37c8fec2e6 100644
--- a/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs
@@ -183,6 +183,15 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
             _i2mClass.LaunchDma(ref Unsafe.As<ThreedClassState, InlineToMemoryClassState>(ref _state.State), argument);
         }
 
+        /// <summary>
+        /// Pushes a block of data to the Inline-to-Memory engine.
+        /// </summary>
+        /// <param name="data">Data to push</param>
+        public void LoadInlineData(ReadOnlySpan<int> data)
+        {
+            _i2mClass.LoadInlineData(data);
+        }
+
         /// <summary>
         /// Pushes a word of data to the Inline-to-Memory engine.
         /// </summary>