diff --git a/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClass.cs b/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClass.cs
index be7065563b..d2b6bec377 100644
--- a/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClass.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClass.cs
@@ -1,11 +1,15 @@
-using Ryujinx.Graphics.Device;
+using Ryujinx.Common;
+using Ryujinx.Graphics.Device;
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu.Engine.Types;
using Ryujinx.Graphics.Gpu.Image;
using Ryujinx.Graphics.Texture;
+using Ryujinx.Memory;
using System;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
namespace Ryujinx.Graphics.Gpu.Engine.Twod
{
@@ -44,6 +48,180 @@ namespace Ryujinx.Graphics.Gpu.Engine.Twod
/// Data to be written
public void Write(int offset, int data) => _state.Write(offset, data);
+ ///
+ /// Determines if data is compatible between the source and destination texture.
+ /// The two textures must have the same size, layout, and bytes per pixel.
+ ///
+ /// Info for the first texture
+ /// Info for the second texture
+ /// Format of the first texture
+ /// Format of the second texture
+ /// True if the data is compatible, false otherwise
+ private bool IsDataCompatible(TwodTexture lhs, TwodTexture rhs, FormatInfo lhsFormat, FormatInfo rhsFormat)
+ {
+ if (lhsFormat.BytesPerPixel != rhsFormat.BytesPerPixel ||
+ lhs.Height != rhs.Height ||
+ lhs.Depth != rhs.Depth ||
+ lhs.LinearLayout != rhs.LinearLayout ||
+ lhs.MemoryLayout.Packed != rhs.MemoryLayout.Packed)
+ {
+ return false;
+ }
+
+ if (lhs.LinearLayout)
+ {
+ return lhs.Stride == rhs.Stride;
+ }
+ else
+ {
+ return lhs.Width == rhs.Width;
+ }
+ }
+
+ ///
+ /// Determine if the given region covers the full texture, also considering width alignment.
+ ///
+ /// The texture to check
+ ///
+ /// Region start x
+ /// Region start y
+ /// Region end x
+ /// Region end y
+ /// True if the region covers the full texture, false otherwise
+ private bool IsCopyRegionComplete(TwodTexture texture, FormatInfo formatInfo, int x1, int y1, int x2, int y2)
+ {
+ if (x1 != 0 || y1 != 0 || y2 != texture.Height)
+ {
+ return false;
+ }
+
+ int width;
+ int widthAlignment;
+
+ if (texture.LinearLayout)
+ {
+ widthAlignment = 1;
+ width = texture.Stride / formatInfo.BytesPerPixel;
+ }
+ else
+ {
+ widthAlignment = Constants.GobAlignment / formatInfo.BytesPerPixel;
+ width = texture.Width;
+ }
+
+ return width == BitUtils.AlignUp(x2, widthAlignment);
+ }
+
+ ///
+ /// Performs a full data copy between two textures, reading and writing guest memory directly.
+ /// The textures must have a matching layout, size, and bytes per pixel.
+ ///
+ /// The source texture
+ /// The destination texture
+ /// Copy width
+ /// Copy height
+ /// Bytes per pixel
+ private void UnscaledFullCopy(TwodTexture src, TwodTexture dst, int w, int h, int bpp)
+ {
+ var srcCalculator = new OffsetCalculator(
+ w,
+ h,
+ src.Stride,
+ src.LinearLayout,
+ src.MemoryLayout.UnpackGobBlocksInY(),
+ src.MemoryLayout.UnpackGobBlocksInZ(),
+ bpp);
+
+ (int _, int srcSize) = srcCalculator.GetRectangleRange(0, 0, w, h);
+
+ var memoryManager = _channel.MemoryManager;
+
+ ulong srcGpuVa = src.Address.Pack();
+ ulong dstGpuVa = dst.Address.Pack();
+
+ ReadOnlySpan srcSpan = memoryManager.GetSpan(srcGpuVa, srcSize, true);
+
+ int width;
+ int height = src.Height;
+ if (src.LinearLayout)
+ {
+ width = src.Stride / bpp;
+ }
+ else
+ {
+ width = src.Width;
+ }
+
+ // If the copy is not equal to the width and height of the texture, we will need to copy partially.
+ // It's worth noting that it has already been established that the src and dst are the same size.
+
+ if (w == width && h == height)
+ {
+ memoryManager.Write(dstGpuVa, srcSpan);
+ }
+ else
+ {
+ using WritableRegion dstRegion = memoryManager.GetWritableRegion(dstGpuVa, srcSize, true);
+ Span dstSpan = dstRegion.Memory.Span;
+
+ if (src.LinearLayout)
+ {
+ int stride = src.Stride;
+ int offset = 0;
+ int lineSize = width * bpp;
+
+ for (int y = 0; y < height; y++)
+ {
+ srcSpan.Slice(offset, lineSize).CopyTo(dstSpan.Slice(offset));
+
+ offset += stride;
+ }
+ }
+ else
+ {
+ // Copy with the block linear layout in mind.
+ // Recreate the offset calculate with bpp 1 for copy.
+
+ int stride = w * bpp;
+
+ srcCalculator = new OffsetCalculator(
+ stride,
+ h,
+ 0,
+ false,
+ src.MemoryLayout.UnpackGobBlocksInY(),
+ src.MemoryLayout.UnpackGobBlocksInZ(),
+ 1);
+
+ int strideTrunc = BitUtils.AlignDown(stride, 16);
+
+ ReadOnlySpan> srcVec = MemoryMarshal.Cast>(srcSpan);
+ Span> dstVec = MemoryMarshal.Cast>(dstSpan);
+
+ for (int y = 0; y < h; y++)
+ {
+ int x = 0;
+
+ srcCalculator.SetY(y);
+
+ for (; x < strideTrunc; x += 16)
+ {
+ int offset = srcCalculator.GetOffset(x) >> 4;
+
+ dstVec[offset] = srcVec[offset];
+ }
+
+ for (; x < stride; x++)
+ {
+ int offset = srcCalculator.GetOffset(x);
+
+ dstSpan[offset] = srcSpan[offset];
+ }
+ }
+ }
+ }
+ }
+
///
/// Performs the blit operation, triggered by the register write.
///
@@ -114,16 +292,31 @@ namespace Ryujinx.Graphics.Gpu.Engine.Twod
srcX1 = 0;
}
+ FormatInfo dstCopyTextureFormat = dstCopyTexture.Format.Convert();
+
+ bool canDirectCopy = GraphicsConfig.Fast2DCopy &&
+ srcX2 == dstX2 && srcY2 == dstY2 &&
+ IsDataCompatible(srcCopyTexture, dstCopyTexture, srcCopyTextureFormat, dstCopyTextureFormat) &&
+ IsCopyRegionComplete(srcCopyTexture, srcCopyTextureFormat, srcX1, srcY1, srcX2, srcY2) &&
+ IsCopyRegionComplete(dstCopyTexture, dstCopyTextureFormat, dstX1, dstY1, dstX2, dstY2);
+
var srcTexture = memoryManager.Physical.TextureCache.FindOrCreateTexture(
memoryManager,
srcCopyTexture,
offset,
srcCopyTextureFormat,
+ !canDirectCopy,
false,
srcHint);
if (srcTexture == null)
{
+ if (canDirectCopy)
+ {
+ // Directly copy the data on CPU.
+ UnscaledFullCopy(srcCopyTexture, dstCopyTexture, srcX2, srcY2, srcCopyTextureFormat.BytesPerPixel);
+ }
+
return;
}
@@ -132,7 +325,6 @@ namespace Ryujinx.Graphics.Gpu.Engine.Twod
// When the source texture that was found has a depth format,
// we must enforce the target texture also has a depth format,
// as copies between depth and color formats are not allowed.
- FormatInfo dstCopyTextureFormat;
if (srcTexture.Format.IsDepthOrStencil())
{
@@ -148,6 +340,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Twod
dstCopyTexture,
0,
dstCopyTextureFormat,
+ true,
srcTexture.ScaleMode == TextureScaleMode.Scaled,
dstHint);
diff --git a/Ryujinx.Graphics.Gpu/GraphicsConfig.cs b/Ryujinx.Graphics.Gpu/GraphicsConfig.cs
index d58b8da790..493dbd7bdb 100644
--- a/Ryujinx.Graphics.Gpu/GraphicsConfig.cs
+++ b/Ryujinx.Graphics.Gpu/GraphicsConfig.cs
@@ -28,6 +28,14 @@ namespace Ryujinx.Graphics.Gpu
///
public static bool FastGpuTime = true;
+ ///
+ /// Enables or disables fast 2d engine texture copies entirely on CPU when possible.
+ /// Reduces stuttering and # of textures in games that copy textures around for streaming,
+ /// as textures will not need to be created for the copy, and the data does not need to be
+ /// flushed from GPU.
+ ///
+ public static bool Fast2DCopy = true;
+
///
/// Enables or disables the Just-in-Time compiler for GPU Macro code.
///
diff --git a/Ryujinx.Graphics.Gpu/Image/TextureCache.cs b/Ryujinx.Graphics.Gpu/Image/TextureCache.cs
index fed89770ab..203a3a125a 100644
--- a/Ryujinx.Graphics.Gpu/Image/TextureCache.cs
+++ b/Ryujinx.Graphics.Gpu/Image/TextureCache.cs
@@ -194,6 +194,7 @@ namespace Ryujinx.Graphics.Gpu.Image
TwodTexture copyTexture,
ulong offset,
FormatInfo formatInfo,
+ bool shouldCreate,
bool preferScaling = true,
Size? sizeHint = null)
{
@@ -234,6 +235,11 @@ namespace Ryujinx.Graphics.Gpu.Image
flags |= TextureSearchFlags.WithUpscale;
}
+ if (!shouldCreate)
+ {
+ flags |= TextureSearchFlags.NoCreate;
+ }
+
Texture texture = FindOrCreateTexture(memoryManager, flags, info, 0, sizeHint);
texture?.SynchronizeMemory();
@@ -480,6 +486,10 @@ namespace Ryujinx.Graphics.Gpu.Image
return texture;
}
+ else if (flags.HasFlag(TextureSearchFlags.NoCreate))
+ {
+ return null;
+ }
// Calculate texture sizes, used to find all overlapping textures.
SizeInfo sizeInfo = info.CalculateSizeInfo(layerSize);
diff --git a/Ryujinx.Graphics.Gpu/Image/TextureSearchFlags.cs b/Ryujinx.Graphics.Gpu/Image/TextureSearchFlags.cs
index 45e55c02d9..aea7b167e6 100644
--- a/Ryujinx.Graphics.Gpu/Image/TextureSearchFlags.cs
+++ b/Ryujinx.Graphics.Gpu/Image/TextureSearchFlags.cs
@@ -12,6 +12,7 @@ namespace Ryujinx.Graphics.Gpu.Image
Strict = 1 << 0,
ForSampler = 1 << 1,
ForCopy = 1 << 2,
- WithUpscale = 1 << 3
+ WithUpscale = 1 << 3,
+ NoCreate = 1 << 4
}
}
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs
index 3968cb96ed..b6395e73f6 100644
--- a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs
+++ b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs
@@ -154,14 +154,15 @@ namespace Ryujinx.Graphics.Gpu.Memory
///
/// Gets a writable region from GPU mapped memory.
///
- /// Start address of the range
+ /// Start address of the range
/// Size in bytes to be range
+ /// True if write tracking is triggered on the span
/// A writable region with the data at the specified memory location
- public WritableRegion GetWritableRegion(ulong va, int size)
+ public WritableRegion GetWritableRegion(ulong va, int size, bool tracked = false)
{
if (IsContiguous(va, size))
{
- return Physical.GetWritableRegion(Translate(va), size);
+ return Physical.GetWritableRegion(Translate(va), size, tracked);
}
else
{
@@ -169,7 +170,7 @@ namespace Ryujinx.Graphics.Gpu.Memory
GetSpan(va, size).CopyTo(memory.Span);
- return new WritableRegion(this, va, memory);
+ return new WritableRegion(this, va, memory, tracked);
}
}