From 81cba3c3df7a1534c6690f69de025beeb6ebd1b9 Mon Sep 17 00:00:00 2001
From: Thog <me@thog.eu>
Date: Fri, 1 May 2020 23:18:42 +0200
Subject: [PATCH] nvservices: mitigate abort with heavy load on the GPU
 processing thread (#1173)

* nvservices: mitigate abort with heavy load on the GPU processing thread.

This should fix Mario Tennis and LM3 regressions with syncpoints.

NOTE: Mario Tennis seems to have another issue related to the texture
cache that happens randomly when starting a match.

PS: Also add a debug logger for all known ioctl call to facilitate
debugging and add a missing UpdateMin in EventSignal.

* Address LDj3SNuD's comment

* Address gdkchan's comment
---
 .../Services/Nv/NvDrvServices/NvDeviceFile.cs | 19 ++++--
 .../NvHostCtrl/NvHostCtrlDeviceFile.cs        | 27 ++++++---
 .../NvHostCtrl/Types/NvHostEvent.cs           | 60 ++++++++++++++++++-
 3 files changed, 90 insertions(+), 16 deletions(-)

diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvDeviceFile.cs b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvDeviceFile.cs
index b6e6b8f8fa..fe3ae652eb 100644
--- a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvDeviceFile.cs
+++ b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvDeviceFile.cs
@@ -1,7 +1,9 @@
-using Ryujinx.HLE.HOS.Kernel.Memory;
+using Ryujinx.Common.Logging;
+using Ryujinx.HLE.HOS.Kernel.Memory;
 using Ryujinx.HLE.HOS.Kernel.Process;
 using System;
 using System.Diagnostics;
+using System.Reflection;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 
@@ -50,11 +52,18 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices
         protected delegate NvInternalResult IoctlProcessorInline<T, T1>(ref T arguments, ref T1 inlineData);
         protected delegate NvInternalResult IoctlProcessorInlineSpan<T, T1>(ref T arguments, Span<T1> inlineData);
 
+        private static NvInternalResult PrintResult(MethodInfo info, NvInternalResult result)
+        {
+            Logger.PrintDebug(LogClass.ServiceNv, $"{info.Name} returned result {result}");
+
+            return result;
+        }
+
         protected static NvInternalResult CallIoctlMethod<T>(IoctlProcessor<T> callback, Span<byte> arguments) where T : struct
         {
             Debug.Assert(arguments.Length == Unsafe.SizeOf<T>());
 
-            return callback(ref MemoryMarshal.Cast<byte, T>(arguments)[0]);
+            return PrintResult(callback.Method, callback(ref MemoryMarshal.Cast<byte, T>(arguments)[0]));
         }
 
         protected static NvInternalResult CallIoctlMethod<T, T1>(IoctlProcessorInline<T, T1> callback, Span<byte> arguments, Span<byte> inlineBuffer) where T : struct where T1 : struct
@@ -62,19 +71,19 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices
             Debug.Assert(arguments.Length == Unsafe.SizeOf<T>());
             Debug.Assert(inlineBuffer.Length == Unsafe.SizeOf<T1>());
 
-            return callback(ref MemoryMarshal.Cast<byte, T>(arguments)[0], ref MemoryMarshal.Cast<byte, T1>(inlineBuffer)[0]);
+            return PrintResult(callback.Method, callback(ref MemoryMarshal.Cast<byte, T>(arguments)[0], ref MemoryMarshal.Cast<byte, T1>(inlineBuffer)[0]));
         }
 
         protected static NvInternalResult CallIoctlMethod<T>(IoctlProcessorSpan<T> callback, Span<byte> arguments) where T : struct
         {
-            return callback(MemoryMarshal.Cast<byte, T>(arguments));
+            return PrintResult(callback.Method, callback(MemoryMarshal.Cast<byte, T>(arguments)));
         }
 
         protected static NvInternalResult CallIoctlMethod<T, T1>(IoctlProcessorInlineSpan<T, T1> callback, Span<byte> arguments, Span<byte> inlineBuffer) where T : struct where T1 : struct
         {
             Debug.Assert(arguments.Length == Unsafe.SizeOf<T>());
 
-            return callback(ref MemoryMarshal.Cast<byte, T>(arguments)[0], MemoryMarshal.Cast<byte, T1>(inlineBuffer));
+            return PrintResult(callback.Method, callback(ref MemoryMarshal.Cast<byte, T>(arguments)[0], MemoryMarshal.Cast<byte, T1>(inlineBuffer)));
         }
 
         public abstract void Close();
diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostCtrl/NvHostCtrlDeviceFile.cs b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostCtrl/NvHostCtrlDeviceFile.cs
index 994b5b628b..5851278344 100644
--- a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostCtrl/NvHostCtrlDeviceFile.cs
+++ b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostCtrl/NvHostCtrlDeviceFile.cs
@@ -310,6 +310,8 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
 
             hostEvent.State = NvHostEventState.Cancelled;
 
+            _device.System.HostSyncpoint.UpdateMin(hostEvent.Fence.Id);
+
             return NvInternalResult.Success;
         }
 
@@ -398,20 +400,29 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
                 hostEvent.State == NvHostEventState.Signaled  ||
                 hostEvent.State == NvHostEventState.Cancelled))
             {
-                hostEvent.Wait(_device.Gpu, fence);
+                bool timedOut = hostEvent.Wait(_device.Gpu, fence);
 
-                if (isWaitEventCmd)
+                if (timedOut)
                 {
-                    value = ((fence.Id & 0xfff) << 16) | 0x10000000;
+                    if (isWaitEventCmd)
+                    {
+                        value = ((fence.Id & 0xfff) << 16) | 0x10000000;
+                    }
+                    else
+                    {
+                        value = fence.Id << 4;
+                    }
+
+                    value |= eventIndex;
+
+                    result = NvInternalResult.TryAgain;
                 }
                 else
                 {
-                    value = fence.Id << 4;
+                    value = fence.Value;
+
+                    return NvInternalResult.Success;
                 }
-
-                value |= eventIndex;
-
-                result = NvInternalResult.TryAgain;
             }
             else
             {
diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostCtrl/Types/NvHostEvent.cs b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostCtrl/Types/NvHostEvent.cs
index e984fddf76..f62df8b3f0 100644
--- a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostCtrl/Types/NvHostEvent.cs
+++ b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostCtrl/Types/NvHostEvent.cs
@@ -1,8 +1,10 @@
+using Ryujinx.Common.Logging;
 using Ryujinx.Graphics.Gpu;
 using Ryujinx.Graphics.Gpu.Synchronization;
 using Ryujinx.HLE.HOS.Kernel.Threading;
 using Ryujinx.HLE.HOS.Services.Nv.Types;
 using System;
+using System.Threading;
 
 namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
 {
@@ -16,6 +18,15 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
         private NvHostSyncpt          _syncpointManager;
         private SyncpointWaiterHandle _waiterInformation;
 
+        private NvFence _previousFailingFence;
+        private uint    _failingCount;
+
+        /// <summary>
+        /// Max failing count until waiting on CPU.
+        /// FIXME: This seems enough for most of the cases, reduce if needed.
+        /// </summary>
+        private const uint FailingCountMax = 2;
+
         public NvHostEvent(NvHostSyncpt syncpointManager, uint eventId, Horizon system)
         {
             Fence.Id = 0;
@@ -27,11 +38,20 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
             _eventId = eventId;
 
             _syncpointManager = syncpointManager;
+
+            ResetFailingState();
+        }
+
+        private void ResetFailingState()
+        {
+            _previousFailingFence.Id    = NvFence.InvalidSyncPointId;
+            _previousFailingFence.Value = 0;
+            _failingCount               = 0;
         }
 
         public void Reset()
         {
-            Fence.Id    = NvFence.InvalidSyncPointId;
+            Fence.Id    = 0;
             Fence.Value = 0;
             State       = NvHostEventState.Available;
         }
@@ -52,6 +72,8 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
 
         private void GpuSignaled()
         {
+            ResetFailingState();
+
             Signal();
         }
 
@@ -61,18 +83,50 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
             {
                 gpuContext.Synchronization.UnregisterCallback(Fence.Id, _waiterInformation);
 
+                if (_previousFailingFence.Id == Fence.Id && _previousFailingFence.Value == Fence.Value)
+                {
+                    _failingCount++;
+                }
+                else
+                {
+                    _failingCount = 1;
+
+                    _previousFailingFence = Fence;
+                }
+
                 Signal();
             }
 
             Event.WritableEvent.Clear();
         }
 
-        public void Wait(GpuContext gpuContext, NvFence fence)
+        public bool Wait(GpuContext gpuContext, NvFence fence)
         {
             Fence = fence;
             State = NvHostEventState.Waiting;
 
-            _waiterInformation = gpuContext.Synchronization.RegisterCallbackOnSyncpoint(Fence.Id, Fence.Value, GpuSignaled);
+            // NOTE: nvservices code should always wait on the GPU side.
+            //       If we do this, we may get an abort or undefined behaviour when the GPU processing thread is blocked for a long period (for example, during shader compilation).
+            //       The reason for this is that the NVN code will try to wait until giving up.
+            //       This is done by trying to wait and signal multiple times until aborting after you are past the timeout.
+            //       As such, if it fails too many time, we enforce a wait on the CPU side indefinitely.
+            //       This allows to keep GPU and CPU in sync when we are slow.
+            if (_failingCount == FailingCountMax)
+            {
+                Logger.PrintWarning(LogClass.ServiceNv, "GPU processing thread is too slow, waiting on CPU...");
+
+                bool timedOut = Fence.Wait(gpuContext, Timeout.InfiniteTimeSpan);
+
+                GpuSignaled();
+
+                return timedOut;
+            }
+            else
+            {
+                _waiterInformation = gpuContext.Synchronization.RegisterCallbackOnSyncpoint(Fence.Id, Fence.Value, GpuSignaled);
+
+                return true;
+            }
         }
 
         public string DumpState(GpuContext gpuContext)