RyuKen/Ryujinx.Graphics/VDec/VideoDecoder.cs
Thomas Guillemard 884b4e5fd3 Initial non 2D textures support (#525)
* Initial non 2D textures support

- Shaders still need to be changed
- Some types aren't yet implemented

* Start implementing texture instructions suffixes

Fix wrong texture type with cube and TEXS

Also support array textures in TEX and TEX.B

Clean up TEX and TEXS coords managment

Fix TEXS.LL with non-2d textures

Implement TEX.AOFFI

Get the right arguments for TEX, TEXS and TLDS

Also, store suffix operands in appropriate values to support multiple
suffix combinaisons

* Support depth in read/writeTexture

Also support WrapR and detect mipmap

* Proper cube map textures support + fix TEXS.LZ

* Implement depth compare

* some code clean up

* Implement CubeMap textures in OGLTexture.Create

* Implement TLD4 and TLD4S

* Add Texture 1D support

* updates comments

* fix some code style issues

* Fix some nits + rename some things to be less confusing

* Remove GetSuffix local functions

* AOFFI => AOffI

* TextureType => GalTextureTarget

* finish renaming TextureType to TextureTarget

* Disable LL, LZ and LB support in the decompiler

This needs more work at the GL level (GLSL implementation should be
right)

* Revert "Disable LL, LZ and LB support in the decompiler"

This reverts commit 64536c3d9f673645faff3152838d1413c3203395.

* Fix TEXS ARRAY_2D index

* ImageFormat depth should be 1 for all image format

* Fix shader build issues with sampler1DShadow and texture

* Fix DC & AOFFI combinaison with TEX/TEXS

* Support AOFFI with TLD4 and TLD4S

* Fix shader compilation error for TLD4.AOFFI with no DC

* Fix binding isuses on the 2d copy engine

TODO: support 2d array copy

* Support 2D array copy operation in the 2D engine

This make every copy right in the GPU side.
Thie CPU copy probably needs to be updated

* Implement GetGpuSize + fix somes issues with 2d engine copies

TODO: mipmap level in it

* Don't throw an exception in the layer handling

* Fix because of rebase

* Reject 2d layers of non textures in 2d copy engine

* Add 3D textures and mipmap support on BlockLinearSwizzle

* Fix naming on new BitUtils methods

* gpu cache: Make sure to invalidate textures that doesn't have the same target

* Add the concept of layer count for array instead of using depth

Also cleanup GetGpuSize as Swizzle can compute the size with mipmap

* Support multi layer with mip map in ReadTexture

* Add more check for cache invalidation & remove cubemap and cubemap array code for now

Also fix compressed 2d array

* Fix texelFetchOffset shader build error

* Start looking into cube map again

Also add some way to log write in register in engines

* fix write register log levles

* Remove debug logs in WriteRegister

* Disable AOFFI support on non NVIDIA drivers

* Fix code align
2019-02-28 12:12:24 +11:00

281 lines
No EOL
12 KiB
C#

using ChocolArm64.Memory;
using Ryujinx.Graphics.Gal;
using Ryujinx.Graphics.Memory;
using Ryujinx.Graphics.Texture;
using Ryujinx.Graphics.Vic;
using System;
namespace Ryujinx.Graphics.VDec
{
unsafe class VideoDecoder
{
private NvGpu Gpu;
private H264Decoder H264Decoder;
private Vp9Decoder Vp9Decoder;
private VideoCodec CurrentVideoCodec;
private long DecoderContextAddress;
private long FrameDataAddress;
private long VpxCurrLumaAddress;
private long VpxRef0LumaAddress;
private long VpxRef1LumaAddress;
private long VpxRef2LumaAddress;
private long VpxCurrChromaAddress;
private long VpxRef0ChromaAddress;
private long VpxRef1ChromaAddress;
private long VpxRef2ChromaAddress;
private long VpxProbTablesAddress;
public VideoDecoder(NvGpu Gpu)
{
this.Gpu = Gpu;
H264Decoder = new H264Decoder();
Vp9Decoder = new Vp9Decoder();
}
public void Process(NvGpuVmm Vmm, int MethodOffset, int[] Arguments)
{
VideoDecoderMeth Method = (VideoDecoderMeth)MethodOffset;
switch (Method)
{
case VideoDecoderMeth.SetVideoCodec: SetVideoCodec (Vmm, Arguments); break;
case VideoDecoderMeth.Execute: Execute (Vmm, Arguments); break;
case VideoDecoderMeth.SetDecoderCtxAddr: SetDecoderCtxAddr (Vmm, Arguments); break;
case VideoDecoderMeth.SetFrameDataAddr: SetFrameDataAddr (Vmm, Arguments); break;
case VideoDecoderMeth.SetVpxCurrLumaAddr: SetVpxCurrLumaAddr (Vmm, Arguments); break;
case VideoDecoderMeth.SetVpxRef0LumaAddr: SetVpxRef0LumaAddr (Vmm, Arguments); break;
case VideoDecoderMeth.SetVpxRef1LumaAddr: SetVpxRef1LumaAddr (Vmm, Arguments); break;
case VideoDecoderMeth.SetVpxRef2LumaAddr: SetVpxRef2LumaAddr (Vmm, Arguments); break;
case VideoDecoderMeth.SetVpxCurrChromaAddr: SetVpxCurrChromaAddr(Vmm, Arguments); break;
case VideoDecoderMeth.SetVpxRef0ChromaAddr: SetVpxRef0ChromaAddr(Vmm, Arguments); break;
case VideoDecoderMeth.SetVpxRef1ChromaAddr: SetVpxRef1ChromaAddr(Vmm, Arguments); break;
case VideoDecoderMeth.SetVpxRef2ChromaAddr: SetVpxRef2ChromaAddr(Vmm, Arguments); break;
case VideoDecoderMeth.SetVpxProbTablesAddr: SetVpxProbTablesAddr(Vmm, Arguments); break;
}
}
private void SetVideoCodec(NvGpuVmm Vmm, int[] Arguments)
{
CurrentVideoCodec = (VideoCodec)Arguments[0];
}
private void Execute(NvGpuVmm Vmm, int[] Arguments)
{
if (CurrentVideoCodec == VideoCodec.H264)
{
int FrameDataSize = Vmm.ReadInt32(DecoderContextAddress + 0x48);
H264ParameterSets Params = MemoryHelper.Read<H264ParameterSets>(Vmm.Memory, Vmm.GetPhysicalAddress(DecoderContextAddress + 0x58));
H264Matrices Matrices = new H264Matrices()
{
ScalingMatrix4 = Vmm.ReadBytes(DecoderContextAddress + 0x1c0, 6 * 16),
ScalingMatrix8 = Vmm.ReadBytes(DecoderContextAddress + 0x220, 2 * 64)
};
byte[] FrameData = Vmm.ReadBytes(FrameDataAddress, FrameDataSize);
H264Decoder.Decode(Params, Matrices, FrameData);
}
else if (CurrentVideoCodec == VideoCodec.Vp9)
{
int FrameDataSize = Vmm.ReadInt32(DecoderContextAddress + 0x30);
Vp9FrameKeys Keys = new Vp9FrameKeys()
{
CurrKey = Vmm.GetPhysicalAddress(VpxCurrLumaAddress),
Ref0Key = Vmm.GetPhysicalAddress(VpxRef0LumaAddress),
Ref1Key = Vmm.GetPhysicalAddress(VpxRef1LumaAddress),
Ref2Key = Vmm.GetPhysicalAddress(VpxRef2LumaAddress)
};
Vp9FrameHeader Header = MemoryHelper.Read<Vp9FrameHeader>(Vmm.Memory, Vmm.GetPhysicalAddress(DecoderContextAddress + 0x48));
Vp9ProbabilityTables Probs = new Vp9ProbabilityTables()
{
SegmentationTreeProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x387, 0x7),
SegmentationPredProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x38e, 0x3),
Tx8x8Probs = Vmm.ReadBytes(VpxProbTablesAddress + 0x470, 0x2),
Tx16x16Probs = Vmm.ReadBytes(VpxProbTablesAddress + 0x472, 0x4),
Tx32x32Probs = Vmm.ReadBytes(VpxProbTablesAddress + 0x476, 0x6),
CoefProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x5a0, 0x900),
SkipProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x537, 0x3),
InterModeProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x400, 0x1c),
InterpFilterProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x52a, 0x8),
IsInterProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x41c, 0x4),
CompModeProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x532, 0x5),
SingleRefProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x580, 0xa),
CompRefProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x58a, 0x5),
YModeProbs0 = Vmm.ReadBytes(VpxProbTablesAddress + 0x480, 0x20),
YModeProbs1 = Vmm.ReadBytes(VpxProbTablesAddress + 0x47c, 0x4),
PartitionProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x4e0, 0x40),
MvJointProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x53b, 0x3),
MvSignProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x53e, 0x3),
MvClassProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x54c, 0x14),
MvClass0BitProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x540, 0x3),
MvBitsProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x56c, 0x14),
MvClass0FrProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x560, 0xc),
MvFrProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x542, 0x6),
MvClass0HpProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x548, 0x2),
MvHpProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x54a, 0x2)
};
byte[] FrameData = Vmm.ReadBytes(FrameDataAddress, FrameDataSize);
Vp9Decoder.Decode(Keys, Header, Probs, FrameData);
}
else
{
ThrowUnimplementedCodec();
}
}
private void SetDecoderCtxAddr(NvGpuVmm Vmm, int[] Arguments)
{
DecoderContextAddress = GetAddress(Arguments);
}
private void SetFrameDataAddr(NvGpuVmm Vmm, int[] Arguments)
{
FrameDataAddress = GetAddress(Arguments);
}
private void SetVpxCurrLumaAddr(NvGpuVmm Vmm, int[] Arguments)
{
VpxCurrLumaAddress = GetAddress(Arguments);
}
private void SetVpxRef0LumaAddr(NvGpuVmm Vmm, int[] Arguments)
{
VpxRef0LumaAddress = GetAddress(Arguments);
}
private void SetVpxRef1LumaAddr(NvGpuVmm Vmm, int[] Arguments)
{
VpxRef1LumaAddress = GetAddress(Arguments);
}
private void SetVpxRef2LumaAddr(NvGpuVmm Vmm, int[] Arguments)
{
VpxRef2LumaAddress = GetAddress(Arguments);
}
private void SetVpxCurrChromaAddr(NvGpuVmm Vmm, int[] Arguments)
{
VpxCurrChromaAddress = GetAddress(Arguments);
}
private void SetVpxRef0ChromaAddr(NvGpuVmm Vmm, int[] Arguments)
{
VpxRef0ChromaAddress = GetAddress(Arguments);
}
private void SetVpxRef1ChromaAddr(NvGpuVmm Vmm, int[] Arguments)
{
VpxRef1ChromaAddress = GetAddress(Arguments);
}
private void SetVpxRef2ChromaAddr(NvGpuVmm Vmm, int[] Arguments)
{
VpxRef2ChromaAddress = GetAddress(Arguments);
}
private void SetVpxProbTablesAddr(NvGpuVmm Vmm, int[] Arguments)
{
VpxProbTablesAddress = GetAddress(Arguments);
}
private static long GetAddress(int[] Arguments)
{
return (long)(uint)Arguments[0] << 8;
}
internal void CopyPlanes(NvGpuVmm Vmm, SurfaceOutputConfig OutputConfig)
{
switch (OutputConfig.PixelFormat)
{
case SurfacePixelFormat.RGBA8: CopyPlanesRgba8 (Vmm, OutputConfig); break;
case SurfacePixelFormat.YUV420P: CopyPlanesYuv420p(Vmm, OutputConfig); break;
default: ThrowUnimplementedPixelFormat(OutputConfig.PixelFormat); break;
}
}
private void CopyPlanesRgba8(NvGpuVmm Vmm, SurfaceOutputConfig OutputConfig)
{
FFmpegFrame Frame = FFmpegWrapper.GetFrameRgba();
if ((Frame.Width | Frame.Height) == 0)
{
return;
}
GalImage Image = new GalImage(
OutputConfig.SurfaceWidth,
OutputConfig.SurfaceHeight, 1, 1, 1,
OutputConfig.GobBlockHeight, 1,
GalMemoryLayout.BlockLinear,
GalImageFormat.RGBA8 | GalImageFormat.Unorm,
GalTextureTarget.TwoD);
ImageUtils.WriteTexture(Vmm, Image, Vmm.GetPhysicalAddress(OutputConfig.SurfaceLumaAddress), Frame.Data);
}
private void CopyPlanesYuv420p(NvGpuVmm Vmm, SurfaceOutputConfig OutputConfig)
{
FFmpegFrame Frame = FFmpegWrapper.GetFrame();
if ((Frame.Width | Frame.Height) == 0)
{
return;
}
int HalfSrcWidth = Frame.Width / 2;
int HalfWidth = Frame.Width / 2;
int HalfHeight = Frame.Height / 2;
int AlignedWidth = (OutputConfig.SurfaceWidth + 0xff) & ~0xff;
for (int Y = 0; Y < Frame.Height; Y++)
{
int Src = Y * Frame.Width;
int Dst = Y * AlignedWidth;
int Size = Frame.Width;
for (int Offset = 0; Offset < Size; Offset++)
{
Vmm.WriteByte(OutputConfig.SurfaceLumaAddress + Dst + Offset, *(Frame.LumaPtr + Src + Offset));
}
}
//Copy chroma data from both channels with interleaving.
for (int Y = 0; Y < HalfHeight; Y++)
{
int Src = Y * HalfSrcWidth;
int Dst = Y * AlignedWidth;
for (int X = 0; X < HalfWidth; X++)
{
Vmm.WriteByte(OutputConfig.SurfaceChromaUAddress + Dst + X * 2 + 0, *(Frame.ChromaBPtr + Src + X));
Vmm.WriteByte(OutputConfig.SurfaceChromaUAddress + Dst + X * 2 + 1, *(Frame.ChromaRPtr + Src + X));
}
}
}
private void ThrowUnimplementedCodec()
{
throw new NotImplementedException("Codec \"" + CurrentVideoCodec + "\" is not supported!");
}
private void ThrowUnimplementedPixelFormat(SurfacePixelFormat PixelFormat)
{
throw new NotImplementedException("Pixel format \"" + PixelFormat + "\" is not supported!");
}
}
}