From cd48576f5846aa89a36bfc833e9de5dde9627aed Mon Sep 17 00:00:00 2001
From: riperiperi <rhy3756547@hotmail.com>
Date: Mon, 4 May 2020 03:24:59 +0100
Subject: [PATCH] Implement Counter Queue and Partial Host Conditional
 Rendering (#1167)

* Implementation of query queue and host conditional rendering

* Resolve some comments.

* Use overloads instead of passing object.

* Wake the consumer threads when incrementing syncpoints.

Also, do a busy loop when awaiting the counter for a blocking flush, rather than potentially sleeping the thread.

* Ensure there's a command between begin and end query.
---
 Ryujinx.Graphics.GAL/ICounterEvent.cs         |  11 +
 Ryujinx.Graphics.GAL/IPipeline.cs             |   4 +
 Ryujinx.Graphics.GAL/IRenderer.cs             |   6 +-
 Ryujinx.Graphics.Gpu/Engine/MethodClear.cs    |   9 +-
 .../Engine/MethodConditionalRendering.cs      |  86 +++++--
 Ryujinx.Graphics.Gpu/Engine/MethodDraw.cs     |  21 +-
 .../Engine/MethodIncrementSyncpoint.cs        |   1 +
 Ryujinx.Graphics.Gpu/Engine/MethodReport.cs   |  54 +++--
 Ryujinx.Graphics.Gpu/Memory/CounterCache.cs   |  59 ++++-
 Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs  |   6 +-
 Ryujinx.Graphics.OpenGL/Counters.cs           |  77 -------
 Ryujinx.Graphics.OpenGL/Pipeline.cs           |  41 ++++
 .../Queries/BufferedQuery.cs                  | 105 +++++++++
 .../Queries/CounterQueue.cs                   | 209 ++++++++++++++++++
 .../Queries/CounterQueueEvent.cs              | 100 +++++++++
 Ryujinx.Graphics.OpenGL/Queries/Counters.cs   |  57 +++++
 Ryujinx.Graphics.OpenGL/Renderer.cs           |  14 +-
 17 files changed, 724 insertions(+), 136 deletions(-)
 create mode 100644 Ryujinx.Graphics.GAL/ICounterEvent.cs
 delete mode 100644 Ryujinx.Graphics.OpenGL/Counters.cs
 create mode 100644 Ryujinx.Graphics.OpenGL/Queries/BufferedQuery.cs
 create mode 100644 Ryujinx.Graphics.OpenGL/Queries/CounterQueue.cs
 create mode 100644 Ryujinx.Graphics.OpenGL/Queries/CounterQueueEvent.cs
 create mode 100644 Ryujinx.Graphics.OpenGL/Queries/Counters.cs

diff --git a/Ryujinx.Graphics.GAL/ICounterEvent.cs b/Ryujinx.Graphics.GAL/ICounterEvent.cs
new file mode 100644
index 0000000000..dfabec612d
--- /dev/null
+++ b/Ryujinx.Graphics.GAL/ICounterEvent.cs
@@ -0,0 +1,11 @@
+using System;
+
+namespace Ryujinx.Graphics.GAL
+{
+    public interface ICounterEvent : IDisposable
+    {
+        bool Invalid { get; set; }
+
+        void Flush();
+    }
+}
diff --git a/Ryujinx.Graphics.GAL/IPipeline.cs b/Ryujinx.Graphics.GAL/IPipeline.cs
index 290462fa8b..3bf7ab9348 100644
--- a/Ryujinx.Graphics.GAL/IPipeline.cs
+++ b/Ryujinx.Graphics.GAL/IPipeline.cs
@@ -75,5 +75,9 @@ namespace Ryujinx.Graphics.GAL
 
         void TextureBarrier();
         void TextureBarrierTiled();
+
+        bool TryHostConditionalRendering(ICounterEvent value, ulong compare, bool isEqual);
+        bool TryHostConditionalRendering(ICounterEvent value, ICounterEvent compare, bool isEqual);
+        void EndHostConditionalRendering();
     }
 }
diff --git a/Ryujinx.Graphics.GAL/IRenderer.cs b/Ryujinx.Graphics.GAL/IRenderer.cs
index 56856b2368..4a45f5cb4c 100644
--- a/Ryujinx.Graphics.GAL/IRenderer.cs
+++ b/Ryujinx.Graphics.GAL/IRenderer.cs
@@ -20,10 +20,12 @@ namespace Ryujinx.Graphics.GAL
 
         Capabilities GetCapabilities();
 
-        ulong GetCounter(CounterType type);
+        void UpdateCounters();
 
-        void Initialize();
+        ICounterEvent ReportCounter(CounterType type, EventHandler<ulong> resultHandler);
 
         void ResetCounter(CounterType type);
+
+        void Initialize();
     }
 }
diff --git a/Ryujinx.Graphics.Gpu/Engine/MethodClear.cs b/Ryujinx.Graphics.Gpu/Engine/MethodClear.cs
index a555015d48..a9552762fa 100644
--- a/Ryujinx.Graphics.Gpu/Engine/MethodClear.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/MethodClear.cs
@@ -13,7 +13,9 @@ namespace Ryujinx.Graphics.Gpu.Engine
         /// <param name="argument">Method call argument</param>
         private void Clear(GpuState state, int argument)
         {
-            if (!GetRenderEnable(state))
+            ConditionalRenderEnabled renderEnable = GetRenderEnable(state);
+
+            if (renderEnable == ConditionalRenderEnabled.False)
             {
                 return;
             }
@@ -68,6 +70,11 @@ namespace Ryujinx.Graphics.Gpu.Engine
             }
 
             UpdateRenderTargetState(state, useControl: true);
+
+            if (renderEnable == ConditionalRenderEnabled.Host)
+            {
+                _context.Renderer.Pipeline.EndHostConditionalRendering();
+            }
         }
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Gpu/Engine/MethodConditionalRendering.cs b/Ryujinx.Graphics.Gpu/Engine/MethodConditionalRendering.cs
index 4775de02f7..c8d47b9ff1 100644
--- a/Ryujinx.Graphics.Gpu/Engine/MethodConditionalRendering.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/MethodConditionalRendering.cs
@@ -1,26 +1,34 @@
 using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.GAL;
 using Ryujinx.Graphics.Gpu.State;
 
 namespace Ryujinx.Graphics.Gpu.Engine
 {
     partial class Methods
     {
+        enum ConditionalRenderEnabled
+        {
+            False,
+            True,
+            Host
+        }
+
         /// <summary>
         /// Checks if draws and clears should be performed, according
         /// to currently set conditional rendering conditions.
         /// </summary>
         /// <param name="state">GPU state</param>
         /// <returns>True if rendering is enabled, false otherwise</returns>
-        private bool GetRenderEnable(GpuState state)
+        private ConditionalRenderEnabled GetRenderEnable(GpuState state)
         {
             ConditionState condState = state.Get<ConditionState>(MethodOffset.ConditionState);
 
             switch (condState.Condition)
             {
                 case Condition.Always:
-                    return true;
+                    return ConditionalRenderEnabled.True;
                 case Condition.Never:
-                    return false;
+                    return ConditionalRenderEnabled.False;
                 case Condition.ResultNonZero:
                     return CounterNonZero(condState.Address.Pack());
                 case Condition.Equal:
@@ -31,22 +39,32 @@ namespace Ryujinx.Graphics.Gpu.Engine
 
             Logger.PrintWarning(LogClass.Gpu, $"Invalid conditional render condition \"{condState.Condition}\".");
 
-            return true;
+            return ConditionalRenderEnabled.True;
         }
 
         /// <summary>
         /// Checks if the counter value at a given GPU memory address is non-zero.
         /// </summary>
         /// <param name="gpuVa">GPU virtual address of the counter value</param>
-        /// <returns>True if the value is not zero, false otherwise</returns>
-        private bool CounterNonZero(ulong gpuVa)
+        /// <returns>True if the value is not zero, false otherwise. Returns host if handling with host conditional rendering</returns>
+        private ConditionalRenderEnabled CounterNonZero(ulong gpuVa)
         {
-            if (!FindAndFlush(gpuVa))
+            ICounterEvent evt = _counterCache.FindEvent(gpuVa);
+
+            if (evt == null)
             {
-                return false;
+                return ConditionalRenderEnabled.False;
             }
 
-            return _context.MemoryAccessor.ReadUInt64(gpuVa) != 0;
+            if (_context.Renderer.Pipeline.TryHostConditionalRendering(evt, 0L, false))
+            {
+                return ConditionalRenderEnabled.Host;
+            }
+            else
+            {
+                evt.Flush();
+                return (_context.MemoryAccessor.ReadUInt64(gpuVa) != 0) ? ConditionalRenderEnabled.True : ConditionalRenderEnabled.False;
+            }
         }
 
         /// <summary>
@@ -54,29 +72,57 @@ namespace Ryujinx.Graphics.Gpu.Engine
         /// </summary>
         /// <param name="gpuVa">GPU virtual address</param>
         /// <param name="isEqual">True to check if the values are equal, false to check if they are not equal</param>
-        /// <returns>True if the condition is met, false otherwise</returns>
-        private bool CounterCompare(ulong gpuVa, bool isEqual)
+        /// <returns>True if the condition is met, false otherwise. Returns host if handling with host conditional rendering</returns>
+        private ConditionalRenderEnabled CounterCompare(ulong gpuVa, bool isEqual)
         {
-            if (!FindAndFlush(gpuVa) && !FindAndFlush(gpuVa + 16))
+            ICounterEvent evt = FindEvent(gpuVa);
+            ICounterEvent evt2 = FindEvent(gpuVa + 16);
+
+            if (evt == null && evt2 == null)
             {
-                return false;
+                return ConditionalRenderEnabled.False;
             }
 
-            ulong x = _context.MemoryAccessor.ReadUInt64(gpuVa);
-            ulong y = _context.MemoryAccessor.ReadUInt64(gpuVa + 16);
+            bool useHost;
 
-            return isEqual ? x == y : x != y;
+            if (evt != null && evt2 == null)
+            {
+                useHost = _context.Renderer.Pipeline.TryHostConditionalRendering(evt, _context.MemoryAccessor.ReadUInt64(gpuVa + 16), isEqual);
+            }
+            else if (evt == null && evt2 != null)
+            {
+                useHost = _context.Renderer.Pipeline.TryHostConditionalRendering(evt2, _context.MemoryAccessor.ReadUInt64(gpuVa), isEqual);
+            }
+            else
+            {
+                useHost = _context.Renderer.Pipeline.TryHostConditionalRendering(evt, evt2, isEqual);
+            }
+
+            if (useHost)
+            {
+                return ConditionalRenderEnabled.Host;
+            }
+            else
+            {
+                evt?.Flush();
+                evt2?.Flush();
+
+                ulong x = _context.MemoryAccessor.ReadUInt64(gpuVa);
+                ulong y = _context.MemoryAccessor.ReadUInt64(gpuVa + 16);
+
+                return (isEqual ? x == y : x != y) ? ConditionalRenderEnabled.True : ConditionalRenderEnabled.False;
+            }
         }
 
         /// <summary>
         /// Tries to find a counter that is supposed to be written at the specified address,
-        /// flushing if necessary.
+        /// returning the related event.
         /// </summary>
         /// <param name="gpuVa">GPU virtual address where the counter is supposed to be written</param>
-        /// <returns>True if a counter value is found at the specified address, false otherwise</returns>
-        private bool FindAndFlush(ulong gpuVa)
+        /// <returns>The counter event, or null if not present</returns>
+        private ICounterEvent FindEvent(ulong gpuVa)
         {
-            return _counterCache.Contains(gpuVa);
+            return _counterCache.FindEvent(gpuVa);
         }
     }
 }
diff --git a/Ryujinx.Graphics.Gpu/Engine/MethodDraw.cs b/Ryujinx.Graphics.Gpu/Engine/MethodDraw.cs
index 68131f6262..d70402e92c 100644
--- a/Ryujinx.Graphics.Gpu/Engine/MethodDraw.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/MethodDraw.cs
@@ -35,17 +35,22 @@ namespace Ryujinx.Graphics.Gpu.Engine
         /// <param name="argument">Method call argument</param>
         private void DrawEnd(GpuState state, int argument)
         {
-            bool renderEnable = GetRenderEnable(state);
+            ConditionalRenderEnabled renderEnable = GetRenderEnable(state);
 
-            if (!renderEnable || _instancedDrawPending)
+            if (renderEnable == ConditionalRenderEnabled.False || _instancedDrawPending)
             {
-                if (!renderEnable)
+                if (renderEnable == ConditionalRenderEnabled.False)
                 {
                     PerformDeferredDraws();
                 }
 
                 _drawIndexed = false;
 
+                if (renderEnable == ConditionalRenderEnabled.Host)
+                {
+                    _context.Renderer.Pipeline.EndHostConditionalRendering();
+                }
+
                 return;
             }
 
@@ -72,6 +77,11 @@ namespace Ryujinx.Graphics.Gpu.Engine
 
                 _drawIndexed = false;
 
+                if (renderEnable == ConditionalRenderEnabled.Host)
+                {
+                    _context.Renderer.Pipeline.EndHostConditionalRendering();
+                }
+
                 return;
             }
 
@@ -100,6 +110,11 @@ namespace Ryujinx.Graphics.Gpu.Engine
                     drawState.First,
                     firstInstance);
             }
+
+            if (renderEnable == ConditionalRenderEnabled.Host)
+            {
+                _context.Renderer.Pipeline.EndHostConditionalRendering();
+            }
         }
 
         /// <summary>
diff --git a/Ryujinx.Graphics.Gpu/Engine/MethodIncrementSyncpoint.cs b/Ryujinx.Graphics.Gpu/Engine/MethodIncrementSyncpoint.cs
index 65742ba721..8fcfb9000e 100644
--- a/Ryujinx.Graphics.Gpu/Engine/MethodIncrementSyncpoint.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/MethodIncrementSyncpoint.cs
@@ -13,6 +13,7 @@ namespace Ryujinx.Graphics.Gpu.Engine
         {
             uint syncpointId = (uint)(argument) & 0xFFFF;
 
+            _context.Renderer.UpdateCounters(); // Poll the query counters, the game may want an updated result.
             _context.Synchronization.IncrementSyncpoint(syncpointId);
         }
     }
diff --git a/Ryujinx.Graphics.Gpu/Engine/MethodReport.cs b/Ryujinx.Graphics.Gpu/Engine/MethodReport.cs
index 997f55ff2d..e8efddeaa0 100644
--- a/Ryujinx.Graphics.Gpu/Engine/MethodReport.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/MethodReport.cs
@@ -64,23 +64,9 @@ namespace Ryujinx.Graphics.Gpu.Engine
         {
             CounterData counterData = new CounterData();
 
-            ulong counter = 0;
+            var rs = state.Get<ReportState>(MethodOffset.ReportState);
 
-            switch (type)
-            {
-                case ReportCounterType.Zero:
-                    counter = 0;
-                    break;
-                case ReportCounterType.SamplesPassed:
-                    counter = _context.Renderer.GetCounter(CounterType.SamplesPassed);
-                    break;
-                case ReportCounterType.PrimitivesGenerated:
-                    counter = _context.Renderer.GetCounter(CounterType.PrimitivesGenerated);
-                    break;
-                case ReportCounterType.TransformFeedbackPrimitivesWritten:
-                    counter = _context.Renderer.GetCounter(CounterType.TransformFeedbackPrimitivesWritten);
-                    break;
-            }
+            ulong gpuVa = rs.Address.Pack();
 
             ulong ticks = ConvertNanosecondsToTicks((ulong)PerformanceCounter.ElapsedNanoseconds);
 
@@ -91,18 +77,40 @@ namespace Ryujinx.Graphics.Gpu.Engine
                 ticks /= 256;
             }
 
-            counterData.Counter   = counter;
-            counterData.Timestamp = ticks;
+            ICounterEvent counter = null;
 
-            Span<CounterData> counterDataSpan = MemoryMarshal.CreateSpan(ref counterData, 1);
+            EventHandler<ulong> resultHandler = (object evt, ulong result) =>
+            {
+                counterData.Counter = result;
+                counterData.Timestamp = ticks;
 
-            Span<byte> data = MemoryMarshal.Cast<CounterData, byte>(counterDataSpan);
+                Span<CounterData> counterDataSpan = MemoryMarshal.CreateSpan(ref counterData, 1);
 
-            var rs = state.Get<ReportState>(MethodOffset.ReportState);
+                Span<byte> data = MemoryMarshal.Cast<CounterData, byte>(counterDataSpan);
 
-            _context.MemoryAccessor.Write(rs.Address.Pack(), data);
+                if (counter?.Invalid != true)
+                {
+                    _context.MemoryAccessor.Write(gpuVa, data);
+                }
+            };
 
-            _counterCache.AddOrUpdate(rs.Address.Pack());
+            switch (type)
+            {
+                case ReportCounterType.Zero:
+                    resultHandler(null, 0);
+                    break;
+                case ReportCounterType.SamplesPassed:
+                    counter = _context.Renderer.ReportCounter(CounterType.SamplesPassed, resultHandler);
+                    break;
+                case ReportCounterType.PrimitivesGenerated:
+                    counter = _context.Renderer.ReportCounter(CounterType.PrimitivesGenerated, resultHandler);
+                    break;
+                case ReportCounterType.TransformFeedbackPrimitivesWritten:
+                    counter = _context.Renderer.ReportCounter(CounterType.TransformFeedbackPrimitivesWritten, resultHandler);
+                    break;
+            }
+
+            _counterCache.AddOrUpdate(gpuVa, counter);
         }
 
         /// <summary>
diff --git a/Ryujinx.Graphics.Gpu/Memory/CounterCache.cs b/Ryujinx.Graphics.Gpu/Memory/CounterCache.cs
index 3110cdcc49..90b9187b41 100644
--- a/Ryujinx.Graphics.Gpu/Memory/CounterCache.cs
+++ b/Ryujinx.Graphics.Gpu/Memory/CounterCache.cs
@@ -1,4 +1,5 @@
-using System.Collections.Generic;
+using Ryujinx.Graphics.GAL;
+using System.Collections.Generic;
 
 namespace Ryujinx.Graphics.Gpu.Memory
 {
@@ -10,10 +11,12 @@ namespace Ryujinx.Graphics.Gpu.Memory
         private struct CounterEntry
         {
             public ulong Address { get; }
+            public ICounterEvent Event { get; }
 
-            public CounterEntry(ulong address)
+            public CounterEntry(ulong address, ICounterEvent evt)
             {
                 Address = address;
+                Event = evt;
             }
         }
 
@@ -31,11 +34,11 @@ namespace Ryujinx.Graphics.Gpu.Memory
         /// Adds a new counter to the counter cache, or updates a existing one.
         /// </summary>
         /// <param name="gpuVa">GPU virtual address where the counter will be written in memory</param>
-        public void AddOrUpdate(ulong gpuVa)
+        public void AddOrUpdate(ulong gpuVa, ICounterEvent evt)
         {
             int index = BinarySearch(gpuVa);
 
-            CounterEntry entry = new CounterEntry(gpuVa);
+            CounterEntry entry = new CounterEntry(gpuVa, evt);
 
             if (index < 0)
             {
@@ -76,6 +79,16 @@ namespace Ryujinx.Graphics.Gpu.Memory
                 count++;
             }
 
+            // Notify the removed counter events that their result should no longer be written out.
+            for (int i = 0; i < count; i++)
+            {
+                ICounterEvent evt = _items[index + i].Event;
+                if (evt != null)
+                {
+                    evt.Invalid = true;
+                }
+            }
+
             _items.RemoveRange(index, count);
         }
 
@@ -101,6 +114,44 @@ namespace Ryujinx.Graphics.Gpu.Memory
             return BinarySearch(gpuVa) >= 0;
         }
 
+        /// <summary>
+        /// Flush any counter value written to the specified GPU virtual memory address.
+        /// </summary>
+        /// <param name="gpuVa">GPU virtual address</param>
+        /// <returns>True if any counter value was written on the specified address, false otherwise</returns>
+        public bool FindAndFlush(ulong gpuVa)
+        {
+            int index = BinarySearch(gpuVa);
+            if (index > 0)
+            {
+                _items[index].Event?.Flush();
+
+                return true;
+            } 
+            else
+            {
+                return false;
+            }
+        }
+
+        /// <summary>
+        /// Find any counter event that would write to the specified GPU virtual memory address.
+        /// </summary>
+        /// <param name="gpuVa">GPU virtual address</param>
+        /// <returns>The counter event, or null if not present</returns>
+        public ICounterEvent FindEvent(ulong gpuVa)
+        {
+            int index = BinarySearch(gpuVa);
+            if (index > 0)
+            {
+                return _items[index].Event;
+            }
+            else
+            {
+                return null;
+            }
+        }
+
         /// <summary>
         /// Performs binary search of an address on the list.
         /// </summary>
diff --git a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs
index f4af86f312..6f9ee6a446 100644
--- a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs
+++ b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs
@@ -181,13 +181,13 @@ namespace Ryujinx.Graphics.Gpu.Memory
         {
             lock (_pageTable)
             {
+                // Event handlers are not expected to be thread safe.
+                MemoryUnmapped?.Invoke(this, new UnmapEventArgs(va, size));
+
                 for (ulong offset = 0; offset < size; offset += PageSize)
                 {
                     SetPte(va + offset, PteUnmapped);
                 }
-
-                // Event handlers are not expected to be thread safe.
-                MemoryUnmapped?.Invoke(this, new UnmapEventArgs(va, size));
             }
         }
 
diff --git a/Ryujinx.Graphics.OpenGL/Counters.cs b/Ryujinx.Graphics.OpenGL/Counters.cs
deleted file mode 100644
index e82a040f0e..0000000000
--- a/Ryujinx.Graphics.OpenGL/Counters.cs
+++ /dev/null
@@ -1,77 +0,0 @@
-using OpenTK.Graphics.OpenGL;
-using Ryujinx.Graphics.GAL;
-using System;
-
-namespace Ryujinx.Graphics.OpenGL
-{
-    class Counters
-    {
-        private int[] _queryObjects;
-
-        private ulong[] _accumulatedCounters;
-
-        public Counters()
-        {
-            int count = Enum.GetNames(typeof(CounterType)).Length;
-
-            _queryObjects = new int[count];
-
-            _accumulatedCounters = new ulong[count];
-        }
-
-        public void Initialize()
-        {
-            for (int index = 0; index < _queryObjects.Length; index++)
-            {
-                int handle = GL.GenQuery();
-
-                _queryObjects[index] = handle;
-
-                CounterType type = (CounterType)index;
-
-                GL.BeginQuery(GetTarget(type), handle);
-            }
-        }
-
-        public ulong GetCounter(CounterType type)
-        {
-            UpdateAccumulatedCounter(type);
-
-            return _accumulatedCounters[(int)type];
-        }
-
-        public void ResetCounter(CounterType type)
-        {
-            UpdateAccumulatedCounter(type);
-
-            _accumulatedCounters[(int)type] = 0;
-        }
-
-        private void UpdateAccumulatedCounter(CounterType type)
-        {
-            int handle = _queryObjects[(int)type];
-
-            QueryTarget target = GetTarget(type);
-
-            GL.EndQuery(target);
-
-            GL.GetQueryObject(handle, GetQueryObjectParam.QueryResult, out long result);
-
-            _accumulatedCounters[(int)type] += (ulong)result;
-
-            GL.BeginQuery(target, handle);
-        }
-
-        private static QueryTarget GetTarget(CounterType type)
-        {
-            switch (type)
-            {
-                case CounterType.SamplesPassed:                      return QueryTarget.SamplesPassed;
-                case CounterType.PrimitivesGenerated:                return QueryTarget.PrimitivesGenerated;
-                case CounterType.TransformFeedbackPrimitivesWritten: return QueryTarget.TransformFeedbackPrimitivesWritten;
-            }
-
-            return QueryTarget.SamplesPassed;
-        }
-    }
-}
diff --git a/Ryujinx.Graphics.OpenGL/Pipeline.cs b/Ryujinx.Graphics.OpenGL/Pipeline.cs
index dc04805dac..80b0710889 100644
--- a/Ryujinx.Graphics.OpenGL/Pipeline.cs
+++ b/Ryujinx.Graphics.OpenGL/Pipeline.cs
@@ -1,6 +1,7 @@
 using OpenTK.Graphics.OpenGL;
 using Ryujinx.Common.Logging;
 using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.OpenGL.Queries;
 using Ryujinx.Graphics.Shader;
 using System;
 
@@ -1023,6 +1024,46 @@ namespace Ryujinx.Graphics.OpenGL
             }
         }
 
+        public bool TryHostConditionalRendering(ICounterEvent value, ulong compare, bool isEqual)
+        {
+            if (value is CounterQueueEvent)
+            {
+                // Compare an event and a constant value.
+                CounterQueueEvent evt = (CounterQueueEvent)value;
+
+                // Easy host conditional rendering when the check matches what GL can do:
+                //  - Event is of type samples passed.
+                //  - Result is not a combination of multiple queries.
+                //  - Comparing against 0.
+                //  - Event has not already been flushed.
+
+                if (evt.Disposed)
+                {
+                    // If the event has been flushed, then just use the values on the CPU.
+                    // The query object may already be repurposed for another draw (eg. begin + end).
+                    return false; 
+                }
+
+                if (compare == 0 && evt.Type == QueryTarget.SamplesPassed && evt.ClearCounter)
+                {
+                    GL.BeginConditionalRender(evt.Query, isEqual ? ConditionalRenderType.QueryNoWaitInverted : ConditionalRenderType.QueryNoWait);
+                    return true;
+                }
+            }
+
+            return false; // The GPU will flush the queries to CPU and evaluate the condition there instead.
+        }
+
+        public bool TryHostConditionalRendering(ICounterEvent value, ICounterEvent compare, bool isEqual)
+        {
+            return false; // We don't currently have a way to compare two counters for conditional rendering.
+        }
+
+        public void EndHostConditionalRendering()
+        {
+            GL.EndConditionalRender();
+        }
+
         public void Dispose()
         {
             _framebuffer?.Dispose();
diff --git a/Ryujinx.Graphics.OpenGL/Queries/BufferedQuery.cs b/Ryujinx.Graphics.OpenGL/Queries/BufferedQuery.cs
new file mode 100644
index 0000000000..9750ec92ba
--- /dev/null
+++ b/Ryujinx.Graphics.OpenGL/Queries/BufferedQuery.cs
@@ -0,0 +1,105 @@
+using OpenTK.Graphics.OpenGL;
+using Ryujinx.Common.Logging;
+using System;
+using System.Runtime.InteropServices;
+using System.Threading;
+
+namespace Ryujinx.Graphics.OpenGL.Queries
+{
+    class BufferedQuery : IDisposable
+    {
+        private const int MaxQueryRetries = 5000;
+        private const long DefaultValue = -1;
+
+        public int Query { get; }
+
+        private int _buffer;
+        private IntPtr _bufferMap;
+        private QueryTarget _type;
+
+        public BufferedQuery(QueryTarget type)
+        {
+            _buffer = GL.GenBuffer();
+            Query = GL.GenQuery();
+            _type = type;
+
+            GL.BindBuffer(BufferTarget.QueryBuffer, _buffer);
+
+            unsafe
+            {
+                long defaultValue = DefaultValue;
+                GL.BufferStorage(BufferTarget.QueryBuffer, sizeof(long), (IntPtr)(&defaultValue), BufferStorageFlags.MapReadBit | BufferStorageFlags.MapWriteBit | BufferStorageFlags.MapPersistentBit);
+            }
+            _bufferMap = GL.MapBufferRange(BufferTarget.QueryBuffer, IntPtr.Zero, sizeof(long), BufferAccessMask.MapReadBit | BufferAccessMask.MapWriteBit | BufferAccessMask.MapPersistentBit);
+        }
+
+        public void Reset()
+        {
+            GL.EndQuery(_type);
+            GL.BeginQuery(_type, Query);
+        }
+
+        public void Begin()
+        {
+            GL.BeginQuery(_type, Query);
+        }
+
+        public unsafe void End()
+        {
+            GL.Flush();
+            GL.EndQuery(_type);
+
+            GL.BindBuffer(BufferTarget.QueryBuffer, _buffer);
+
+            Marshal.WriteInt64(_bufferMap, -1L);
+            GL.GetQueryObject(Query, GetQueryObjectParam.QueryResult, (long*)0);
+        }
+
+        public bool TryGetResult(out long result)
+        {
+            result = Marshal.ReadInt64(_bufferMap);
+
+            return result != DefaultValue;
+        }
+
+        public long AwaitResult(AutoResetEvent wakeSignal = null)
+        {
+            long data = DefaultValue;
+
+            if (wakeSignal == null)
+            {
+                while (data == DefaultValue)
+                {
+                    data = Marshal.ReadInt64(_bufferMap);
+                }
+            }
+            else
+            {
+                int iterations = 0;
+                while (data == DefaultValue && iterations++ < MaxQueryRetries)
+                {
+                    data = Marshal.ReadInt64(_bufferMap);
+                    if (data == DefaultValue)
+                    {
+                        wakeSignal.WaitOne(1);
+                    }
+                }
+
+                if (iterations >= MaxQueryRetries)
+                {
+                    Logger.PrintError(LogClass.Gpu, $"Error: Query result timed out. Took more than {MaxQueryRetries} tries.");
+                }
+            }
+
+            return data;
+        }
+
+        public void Dispose()
+        {
+            GL.BindBuffer(BufferTarget.QueryBuffer, _buffer);
+            GL.UnmapBuffer(BufferTarget.QueryBuffer);
+            GL.DeleteBuffer(_buffer);
+            GL.DeleteQuery(Query);
+        }
+    }
+}
diff --git a/Ryujinx.Graphics.OpenGL/Queries/CounterQueue.cs b/Ryujinx.Graphics.OpenGL/Queries/CounterQueue.cs
new file mode 100644
index 0000000000..f34bc86d6e
--- /dev/null
+++ b/Ryujinx.Graphics.OpenGL/Queries/CounterQueue.cs
@@ -0,0 +1,209 @@
+using OpenTK.Graphics.OpenGL;
+using Ryujinx.Graphics.GAL;
+using System;
+using System.Collections.Generic;
+using System.Threading;
+
+namespace Ryujinx.Graphics.OpenGL.Queries
+{
+    class CounterQueue : IDisposable
+    {
+        private const int QueryPoolInitialSize = 100;
+
+        public CounterType Type { get; }
+        public bool Disposed { get; private set; }
+
+        private Queue<CounterQueueEvent> _events = new Queue<CounterQueueEvent>();
+        private CounterQueueEvent _current;
+
+        private ulong _accumulatedCounter;
+
+        private object _lock = new object();
+
+        private Queue<BufferedQuery> _queryPool;
+        private AutoResetEvent _queuedEvent = new AutoResetEvent(false);
+        private AutoResetEvent _wakeSignal = new AutoResetEvent(false);
+
+        private Thread _consumerThread;
+
+        internal CounterQueue(CounterType type)
+        {
+            Type = type;
+
+            QueryTarget glType = GetTarget(Type);
+
+            _queryPool = new Queue<BufferedQuery>(QueryPoolInitialSize);
+            for (int i = 0; i < QueryPoolInitialSize; i++)
+            {
+                _queryPool.Enqueue(new BufferedQuery(glType));
+            }
+
+            _current = new CounterQueueEvent(this, glType);
+
+            _consumerThread = new Thread(EventConsumer);
+            _consumerThread.Start();
+        }
+
+        private void EventConsumer()
+        {
+            while (!Disposed)
+            {
+                CounterQueueEvent evt = null;
+                lock (_lock)
+                {
+                    if (_events.Count > 0)
+                    {
+                        evt = _events.Dequeue();
+                    }
+                }
+
+                if (evt == null)
+                {
+                    _queuedEvent.WaitOne(); // No more events to go through, wait for more.
+                }
+                else
+                {
+                    evt.TryConsume(ref _accumulatedCounter, true, _wakeSignal);
+                }
+            }
+        }
+
+        internal BufferedQuery GetQueryObject()
+        {
+            // Creating/disposing query objects on a context we're sharing with will cause issues.
+            // So instead, make a lot of query objects on the main thread and reuse them.
+
+            lock (_lock)
+            {
+                if (_queryPool.Count > 0)
+                {
+                    BufferedQuery result = _queryPool.Dequeue();
+                    return result;
+                }
+                else
+                {
+                    return new BufferedQuery(GetTarget(Type));
+                }
+            }
+        }
+
+        internal void ReturnQueryObject(BufferedQuery query)
+        {
+            lock (_lock)
+            {
+                _queryPool.Enqueue(query);
+            }
+        }
+
+        public CounterQueueEvent QueueReport(EventHandler<ulong> resultHandler)
+        {
+            CounterQueueEvent result;
+
+            lock (_lock)
+            {
+                _current.Complete();
+                _events.Enqueue(_current);
+
+                result = _current;
+                result.OnResult += resultHandler;
+
+                _current = new CounterQueueEvent(this, GetTarget(Type));
+            }
+
+            _queuedEvent.Set();
+
+            return result;
+        }
+
+        public void QueueReset()
+        {
+            lock (_lock)
+            {
+                _current.Clear();
+            }
+        }
+
+        private static QueryTarget GetTarget(CounterType type)
+        {
+            switch (type)
+            {
+                case CounterType.SamplesPassed: return QueryTarget.SamplesPassed;
+                case CounterType.PrimitivesGenerated: return QueryTarget.PrimitivesGenerated;
+                case CounterType.TransformFeedbackPrimitivesWritten: return QueryTarget.TransformFeedbackPrimitivesWritten;
+            }
+
+            return QueryTarget.SamplesPassed;
+        }
+
+        public void Flush(bool blocking)
+        {
+            if (!blocking)
+            {
+                // Just wake the consumer thread - it will update the queries.
+                _wakeSignal.Set();
+                return;
+            }
+
+            lock (_lock)
+            {
+                // Tell the queue to process all events.
+                while (_events.Count > 0)
+                {
+                    CounterQueueEvent flush = _events.Peek();
+                    if (!flush.TryConsume(ref _accumulatedCounter, true))
+                    {
+                        return; // If not blocking, then return when we encounter an event that is not ready yet.
+                    }
+                    _events.Dequeue();
+                }
+            }
+        }
+
+        public void FlushTo(CounterQueueEvent evt)
+        {
+            lock (_lock)
+            {
+                if (evt.Disposed)
+                {
+                    return;
+                }
+
+                // Tell the queue to process all events up to this one.
+                while (_events.Count > 0)
+                {
+                    CounterQueueEvent flush = _events.Dequeue();
+                    flush.TryConsume(ref _accumulatedCounter, true);
+
+                    if (flush == evt)
+                    {
+                        return;
+                    }
+                }
+            }
+        }
+
+        public void Dispose()
+        {
+            lock (_lock)
+            {
+                while (_events.Count > 0)
+                {
+                    CounterQueueEvent evt = _events.Dequeue();
+
+                    evt.Dispose();
+                }
+
+                Disposed = true;
+            }
+
+            _queuedEvent.Set();
+
+            _consumerThread.Join();
+
+            foreach (BufferedQuery query in _queryPool)
+            {
+                query.Dispose();
+            }
+        }
+    }
+}
diff --git a/Ryujinx.Graphics.OpenGL/Queries/CounterQueueEvent.cs b/Ryujinx.Graphics.OpenGL/Queries/CounterQueueEvent.cs
new file mode 100644
index 0000000000..0fba24a13b
--- /dev/null
+++ b/Ryujinx.Graphics.OpenGL/Queries/CounterQueueEvent.cs
@@ -0,0 +1,100 @@
+using OpenTK.Graphics.OpenGL;
+using Ryujinx.Graphics.GAL;
+using System;
+using System.Threading;
+
+namespace Ryujinx.Graphics.OpenGL.Queries
+{
+    class CounterQueueEvent : ICounterEvent
+    {
+        public event EventHandler<ulong> OnResult;
+
+        public QueryTarget Type { get; }
+        public bool ClearCounter { get; private set; }
+        public int Query => _counter.Query;
+
+        public bool Disposed { get; private set; }
+        public bool Invalid { get; set; }
+
+        private CounterQueue _queue;
+        private BufferedQuery _counter;
+
+        private object _lock = new object();
+
+        public CounterQueueEvent(CounterQueue queue, QueryTarget type)
+        {
+            _queue = queue;
+
+            _counter = queue.GetQueryObject();
+            Type = type;
+
+            _counter.Begin();
+        }
+
+        internal void Clear()
+        {
+            _counter.Reset();
+            ClearCounter = true;
+        }
+
+        internal void Complete()
+        {
+            _counter.End();
+        }
+
+        internal bool TryConsume(ref ulong result, bool block, AutoResetEvent wakeSignal = null)
+        {
+            lock (_lock)
+            {
+                if (Disposed)
+                {
+                    return true;
+                }
+
+                if (ClearCounter || Type == QueryTarget.Timestamp)
+                {
+                    result = 0;
+                }
+
+                long queryResult;
+
+                if (block)
+                {
+                    queryResult = _counter.AwaitResult(wakeSignal);
+                }
+                else
+                {
+                    if (!_counter.TryGetResult(out queryResult))
+                    {
+                        return false;
+                    }
+                }
+
+                result += (ulong)queryResult;
+
+                OnResult?.Invoke(this, result);
+
+                Dispose(); // Return the our resources to the pool.
+
+                return true;
+            }
+        }
+
+        public void Flush()
+        {
+            if (Disposed)
+            {
+                return;
+            }
+
+            // Tell the queue to process all events up to this one.
+            _queue.FlushTo(this);
+        }
+
+        public void Dispose()
+        {
+            Disposed = true;
+            _queue.ReturnQueryObject(_counter);
+        }
+    }
+}
diff --git a/Ryujinx.Graphics.OpenGL/Queries/Counters.cs b/Ryujinx.Graphics.OpenGL/Queries/Counters.cs
new file mode 100644
index 0000000000..7560fb176b
--- /dev/null
+++ b/Ryujinx.Graphics.OpenGL/Queries/Counters.cs
@@ -0,0 +1,57 @@
+using Ryujinx.Graphics.GAL;
+using System;
+
+namespace Ryujinx.Graphics.OpenGL.Queries
+{
+    class Counters : IDisposable
+    {
+        private CounterQueue[] _counterQueues;
+
+        public Counters()
+        {
+            int count = Enum.GetNames(typeof(CounterType)).Length;
+
+            _counterQueues = new CounterQueue[count];
+        }
+
+        public void Initialize()
+        {
+            for (int index = 0; index < _counterQueues.Length; index++)
+            {
+                CounterType type = (CounterType)index;
+                _counterQueues[index] = new CounterQueue(type);
+            }
+        }
+
+        public CounterQueueEvent QueueReport(CounterType type, EventHandler<ulong> resultHandler)
+        {
+            return _counterQueues[(int)type].QueueReport(resultHandler);
+        }
+
+        public void QueueReset(CounterType type)
+        {
+            _counterQueues[(int)type].QueueReset();
+        }
+
+        public void Update()
+        {
+            foreach (var queue in _counterQueues)
+            {
+                queue.Flush(false);
+            }
+        }
+
+        public void Flush(CounterType type)
+        {
+            _counterQueues[(int)type].Flush(true);
+        }
+
+        public void Dispose()
+        {
+            foreach (var queue in _counterQueues)
+            {
+                queue.Dispose();
+            }
+        }
+    }
+}
diff --git a/Ryujinx.Graphics.OpenGL/Renderer.cs b/Ryujinx.Graphics.OpenGL/Renderer.cs
index 3f7183458b..b3ae8c33d6 100644
--- a/Ryujinx.Graphics.OpenGL/Renderer.cs
+++ b/Ryujinx.Graphics.OpenGL/Renderer.cs
@@ -1,7 +1,9 @@
 using OpenTK.Graphics.OpenGL;
 using Ryujinx.Common.Logging;
 using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.OpenGL.Queries;
 using Ryujinx.Graphics.Shader;
+using System;
 
 namespace Ryujinx.Graphics.OpenGL
 {
@@ -66,9 +68,14 @@ namespace Ryujinx.Graphics.OpenGL
                 HwCapabilities.MaxSupportedAnisotropy);
         }
 
-        public ulong GetCounter(CounterType type)
+        public void UpdateCounters()
         {
-            return _counters.GetCounter(type);
+            _counters.Update();
+        }
+
+        public ICounterEvent ReportCounter(CounterType type, EventHandler<ulong> resultHandler)
+        {
+            return _counters.QueueReport(type, resultHandler);
         }
 
         public void Initialize()
@@ -89,7 +96,7 @@ namespace Ryujinx.Graphics.OpenGL
 
         public void ResetCounter(CounterType type)
         {
-            _counters.ResetCounter(type);
+            _counters.QueueReset(type);
         }
 
         public void Dispose()
@@ -97,6 +104,7 @@ namespace Ryujinx.Graphics.OpenGL
             TextureCopy.Dispose();
             _pipeline.Dispose();
             _window.Dispose();
+            _counters.Dispose();
         }
     }
 }