Implement VIC BGRA output surface format (#1430)

This commit is contained in:
gdkchan 2020-07-28 18:25:58 -03:00 committed by GitHub
parent ca0d1f8205
commit 938c06c652
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -17,6 +17,9 @@ namespace Ryujinx.Graphics.Vic.Image
case PixelFormat.A8B8G8R8: case PixelFormat.A8B8G8R8:
WriteA8B8G8R8(rm, input, ref config, ref offsets); WriteA8B8G8R8(rm, input, ref config, ref offsets);
break; break;
case PixelFormat.A8R8G8B8:
WriteA8R8G8B8(rm, input, ref config, ref offsets);
break;
case PixelFormat.Y8___V8U8_N420: case PixelFormat.Y8___V8U8_N420:
WriteNv12(rm, input, ref config, ref offsets); WriteNv12(rm, input, ref config, ref offsets);
break; break;
@ -116,6 +119,105 @@ namespace Ryujinx.Graphics.Vic.Image
rm.BufferPool.Return(dstIndex); rm.BufferPool.Return(dstIndex);
} }
private unsafe static void WriteA8R8G8B8(ResourceManager rm, Surface input, ref OutputSurfaceConfig config, ref PlaneOffsets offsets)
{
int width = input.Width;
int height = input.Height;
int stride = GetPitch(width, 4);
int dstIndex = rm.BufferPool.Rent(height * stride, out Span<byte> dst);
if (Ssse3.IsSupported)
{
Vector128<byte> shuffleMask = Vector128.Create(
(byte)2, (byte)1, (byte)0, (byte)3,
(byte)6, (byte)5, (byte)4, (byte)7,
(byte)10, (byte)9, (byte)8, (byte)11,
(byte)14, (byte)13, (byte)12, (byte)15);
int widthTrunc = width & ~7;
int strideGap = stride - width * 4;
fixed (Pixel* srcPtr = input.Data)
{
Pixel* ip = srcPtr;
fixed (byte* dstPtr = dst)
{
byte* op = dstPtr;
for (int y = 0; y < height; y++, ip += input.Width)
{
int x = 0;
for (; x < widthTrunc; x += 8)
{
Vector128<ushort> pixel12 = Sse2.LoadVector128((ushort*)(ip + (uint)x));
Vector128<ushort> pixel34 = Sse2.LoadVector128((ushort*)(ip + (uint)x + 2));
Vector128<ushort> pixel56 = Sse2.LoadVector128((ushort*)(ip + (uint)x + 4));
Vector128<ushort> pixel78 = Sse2.LoadVector128((ushort*)(ip + (uint)x + 6));
pixel12 = Sse2.ShiftRightLogical(pixel12, 2);
pixel34 = Sse2.ShiftRightLogical(pixel34, 2);
pixel56 = Sse2.ShiftRightLogical(pixel56, 2);
pixel78 = Sse2.ShiftRightLogical(pixel78, 2);
Vector128<byte> pixel1234 = Sse2.PackUnsignedSaturate(pixel12.AsInt16(), pixel34.AsInt16());
Vector128<byte> pixel5678 = Sse2.PackUnsignedSaturate(pixel56.AsInt16(), pixel78.AsInt16());
pixel1234 = Ssse3.Shuffle(pixel1234, shuffleMask);
pixel5678 = Ssse3.Shuffle(pixel5678, shuffleMask);
Sse2.Store(op + 0x00, pixel1234);
Sse2.Store(op + 0x10, pixel5678);
op += 0x20;
}
for (; x < width; x++)
{
Pixel* px = ip + (uint)x;
*(op + 0) = Downsample(px->B);
*(op + 1) = Downsample(px->G);
*(op + 2) = Downsample(px->R);
*(op + 3) = Downsample(px->A);
op += 4;
}
op += strideGap;
}
}
}
}
else
{
for (int y = 0; y < height; y++)
{
int baseOffs = y * stride;
for (int x = 0; x < width; x++)
{
int offs = baseOffs + x * 4;
dst[offs + 0] = Downsample(input.GetB(x, y));
dst[offs + 1] = Downsample(input.GetG(x, y));
dst[offs + 2] = Downsample(input.GetR(x, y));
dst[offs + 3] = Downsample(input.GetA(x, y));
}
}
}
bool outLinear = config.OutBlkKind == 0;
int gobBlocksInY = 1 << config.OutBlkHeight;
WriteBuffer(rm, dst, offsets.LumaOffset, outLinear, width, height, 4, gobBlocksInY);
rm.BufferPool.Return(dstIndex);
}
private unsafe static void WriteNv12(ResourceManager rm, Surface input, ref OutputSurfaceConfig config, ref PlaneOffsets offsets) private unsafe static void WriteNv12(ResourceManager rm, Surface input, ref OutputSurfaceConfig config, ref PlaneOffsets offsets)
{ {
int gobBlocksInY = 1 << config.OutBlkHeight; int gobBlocksInY = 1 << config.OutBlkHeight;