From ecbf303266d78d7b4287ce4ea97d59107a05fb2f Mon Sep 17 00:00:00 2001 From: riperiperi Date: Thu, 18 May 2023 07:56:34 +0100 Subject: [PATCH] GPU: Avoid using garbage size for non-cb0 storage buffers (#4999) * GPU: Avoid using garbage size for non-cb0 storage buffers In the depths area, Tears of the Kingdom uses a global memory access with address on constant buffer slot 6. This isn't standard and thus doesn't actually have a size 8 bytes after it, so we were reading back a garbage size that ended up very large (at least in version 1.1.0), and would synchronize a lot of data per frame. This PR makes storage buffers created from addresses outside constant buffer slot 0 get their size as the number of bytes remaining in the GPU mapping starting at the given virtual address. This should bound the buffer to a reasonable size, and ideally stop it crossing into other memory. * Limit max size * Add TODO * Feedback --- .../Engine/Threed/StateUpdater.cs | 16 ++++++++++- .../Memory/MemoryManager.cs | 27 +++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs index 1c9bf1d2ab..87e58ead78 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs @@ -23,6 +23,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed public const int PrimitiveRestartStateIndex = 12; public const int RenderTargetStateIndex = 27; + private const ulong MaxUnknownStorageSize = 0x100000; + private readonly GpuContext _context; private readonly GpuChannel _channel; private readonly DeviceStateWithShadow _state; @@ -356,7 +358,19 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read(sbDescAddress); - _channel.BufferManager.SetGraphicsStorageBuffer(stage, sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags); + uint size; + if (sb.SbCbSlot == 0) + { + // Only trust the SbDescriptor size if it comes from slot 0. + size = (uint)sbDescriptor.Size; + } + else + { + // TODO: Use full mapped size and somehow speed up buffer sync. + size = (uint)_channel.MemoryManager.GetMappedSize(sbDescriptor.PackAddress(), MaxUnknownStorageSize); + } + + _channel.BufferManager.SetGraphicsStorageBuffer(stage, sb.Slot, sbDescriptor.PackAddress(), size, sb.Flags); } } } diff --git a/src/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs b/src/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs index 0d4a41f021..c7a138c98c 100644 --- a/src/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs +++ b/src/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs @@ -637,6 +637,33 @@ namespace Ryujinx.Graphics.Gpu.Memory return UnpackPaFromPte(pte) + (va & PageMask); } + /// + /// Translates a GPU virtual address and returns the number of bytes that are mapped after it. + /// + /// GPU virtual address to be translated + /// Maximum size in bytes to scan + /// Number of bytes, 0 if unmapped + public ulong GetMappedSize(ulong va, ulong maxSize) + { + if (!ValidateAddress(va)) + { + return 0; + } + + ulong startVa = va; + ulong endVa = va + maxSize; + + ulong pte = GetPte(va); + + while (pte != PteUnmapped && va < endVa) + { + va += PageSize - (va & PageMask); + pte = GetPte(va); + } + + return Math.Min(maxSize, va - startVa); + } + /// /// Gets the kind of a given memory page. /// This might indicate the type of resource that can be allocated on the page, and also texture tiling.