From f906eb06c28880c20160cb4a969e3f6fddb3029b Mon Sep 17 00:00:00 2001
From: gdkchan <gab.dark.100@gmail.com>
Date: Wed, 21 Dec 2022 20:39:58 -0300
Subject: [PATCH] Implement a software ETC2 texture decoder (#4121)

* Implement a software ETC2 texture decoder

* Fix output size calculation for non-2D textures

* Address PR feedback
---
 Ryujinx.Graphics.GAL/Capabilities.cs          |   3 +
 Ryujinx.Graphics.GAL/Format.cs                |  21 +
 Ryujinx.Graphics.Gpu/Image/FormatTable.cs     |   4 +
 Ryujinx.Graphics.Gpu/Image/Texture.cs         |  22 +-
 .../Image/TextureCompatibility.cs             |  32 +-
 Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs     |   1 +
 Ryujinx.Graphics.Texture/ETC2Decoder.cs       | 682 ++++++++++++++++++
 Ryujinx.Graphics.Vulkan/VulkanRenderer.cs     |   8 +
 8 files changed, 763 insertions(+), 10 deletions(-)
 create mode 100644 Ryujinx.Graphics.Texture/ETC2Decoder.cs

diff --git a/Ryujinx.Graphics.GAL/Capabilities.cs b/Ryujinx.Graphics.GAL/Capabilities.cs
index 2d38ecccfc..bc93908b1b 100644
--- a/Ryujinx.Graphics.GAL/Capabilities.cs
+++ b/Ryujinx.Graphics.GAL/Capabilities.cs
@@ -14,6 +14,7 @@ namespace Ryujinx.Graphics.GAL
         public readonly bool SupportsBc123Compression;
         public readonly bool SupportsBc45Compression;
         public readonly bool SupportsBc67Compression;
+        public readonly bool SupportsEtc2Compression;
         public readonly bool Supports3DTextureCompression;
         public readonly bool SupportsBgraFormat;
         public readonly bool SupportsR4G4Format;
@@ -50,6 +51,7 @@ namespace Ryujinx.Graphics.GAL
             bool supportsBc123Compression,
             bool supportsBc45Compression,
             bool supportsBc67Compression,
+            bool supportsEtc2Compression,
             bool supports3DTextureCompression,
             bool supportsBgraFormat,
             bool supportsR4G4Format,
@@ -83,6 +85,7 @@ namespace Ryujinx.Graphics.GAL
             SupportsBc123Compression = supportsBc123Compression;
             SupportsBc45Compression = supportsBc45Compression;
             SupportsBc67Compression = supportsBc67Compression;
+            SupportsEtc2Compression = supportsEtc2Compression;
             Supports3DTextureCompression = supports3DTextureCompression;
             SupportsBgraFormat = supportsBgraFormat;
             SupportsR4G4Format = supportsR4G4Format;
diff --git a/Ryujinx.Graphics.GAL/Format.cs b/Ryujinx.Graphics.GAL/Format.cs
index 8a50f22d4e..87d08803d6 100644
--- a/Ryujinx.Graphics.GAL/Format.cs
+++ b/Ryujinx.Graphics.GAL/Format.cs
@@ -516,6 +516,27 @@ namespace Ryujinx.Graphics.GAL
             return false;
         }
 
+        /// <summary>
+        /// Checks if the texture format is an ETC2 format.
+        /// </summary>
+        /// <param name="format">Texture format</param>
+        /// <returns>True if the texture format is an ETC2 format, false otherwise</returns>
+        public static bool IsEtc2(this Format format)
+        {
+            switch (format)
+            {
+                case Format.Etc2RgbaSrgb:
+                case Format.Etc2RgbaUnorm:
+                case Format.Etc2RgbPtaSrgb:
+                case Format.Etc2RgbPtaUnorm:
+                case Format.Etc2RgbSrgb:
+                case Format.Etc2RgbUnorm:
+                    return true;
+            }
+
+            return false;
+        }
+
         /// <summary>
         /// Checks if the texture format is a BGR format.
         /// </summary>
diff --git a/Ryujinx.Graphics.Gpu/Image/FormatTable.cs b/Ryujinx.Graphics.Gpu/Image/FormatTable.cs
index c76e1fca0c..7290161041 100644
--- a/Ryujinx.Graphics.Gpu/Image/FormatTable.cs
+++ b/Ryujinx.Graphics.Gpu/Image/FormatTable.cs
@@ -208,8 +208,10 @@ namespace Ryujinx.Graphics.Gpu.Image
             Bc6HSf16Float                    = Bc6HSf16          | RFloat | GFloat | BFloat | AFloat,        // 0x7ff90
             Bc6HUf16Float                    = Bc6HUf16          | RFloat | GFloat | BFloat | AFloat,        // 0x7ff91
             Etc2RgbUnorm                     = Etc2Rgb           | RUnorm | GUnorm | BUnorm | AUnorm,        // 0x24906
+            Etc2RgbPtaUnorm                  = Etc2RgbPta        | RUnorm | GUnorm | BUnorm | AUnorm,        // 0x2490a
             Etc2RgbaUnorm                    = Etc2Rgba          | RUnorm | GUnorm | BUnorm | AUnorm,        // 0x2490b
             Etc2RgbUnormSrgb                 = Etc2Rgb           | RUnorm | GUnorm | BUnorm | AUnorm | Srgb, // 0xa4906
+            Etc2RgbPtaUnormSrgb              = Etc2RgbPta        | RUnorm | GUnorm | BUnorm | AUnorm | Srgb, // 0xa490a
             Etc2RgbaUnormSrgb                = Etc2Rgba          | RUnorm | GUnorm | BUnorm | AUnorm | Srgb, // 0xa490b
             Astc2D4x4Unorm                   = Astc2D4x4         | RUnorm | GUnorm | BUnorm | AUnorm,        // 0x24940
             Astc2D5x4Unorm                   = Astc2D5x4         | RUnorm | GUnorm | BUnorm | AUnorm,        // 0x24950
@@ -429,8 +431,10 @@ namespace Ryujinx.Graphics.Gpu.Image
             { TextureFormat.Bc6HSf16Float,                    new FormatInfo(Format.Bc6HSfloat,        4,  4,  16, 4) },
             { TextureFormat.Bc6HUf16Float,                    new FormatInfo(Format.Bc6HUfloat,        4,  4,  16, 4) },
             { TextureFormat.Etc2RgbUnorm,                     new FormatInfo(Format.Etc2RgbUnorm,      4,  4,  8,  3) },
+            { TextureFormat.Etc2RgbPtaUnorm,                  new FormatInfo(Format.Etc2RgbPtaUnorm,   4,  4,  8,  4) },
             { TextureFormat.Etc2RgbaUnorm,                    new FormatInfo(Format.Etc2RgbaUnorm,     4,  4,  16, 4) },
             { TextureFormat.Etc2RgbUnormSrgb,                 new FormatInfo(Format.Etc2RgbSrgb,       4,  4,  8,  3) },
+            { TextureFormat.Etc2RgbPtaUnormSrgb,              new FormatInfo(Format.Etc2RgbPtaSrgb,    4,  4,  8,  4) },
             { TextureFormat.Etc2RgbaUnormSrgb,                new FormatInfo(Format.Etc2RgbaSrgb,      4,  4,  16, 4) },
             { TextureFormat.Astc2D4x4Unorm,                   new FormatInfo(Format.Astc4x4Unorm,      4,  4,  16, 4) },
             { TextureFormat.Astc2D5x4Unorm,                   new FormatInfo(Format.Astc5x4Unorm,      5,  4,  16, 4) },
diff --git a/Ryujinx.Graphics.Gpu/Image/Texture.cs b/Ryujinx.Graphics.Gpu/Image/Texture.cs
index 4203cb0030..904c908f24 100644
--- a/Ryujinx.Graphics.Gpu/Image/Texture.cs
+++ b/Ryujinx.Graphics.Gpu/Image/Texture.cs
@@ -857,9 +857,23 @@ namespace Ryujinx.Graphics.Gpu.Image
 
                 result = decoded;
             }
-            else if (!_context.Capabilities.SupportsR4G4Format && Format == Format.R4G4Unorm)
+            else if (!_context.Capabilities.SupportsEtc2Compression && Format.IsEtc2())
             {
-                result = PixelConverter.ConvertR4G4ToR4G4B4A4(result);
+                switch (Format)
+                {
+                    case Format.Etc2RgbaSrgb:
+                    case Format.Etc2RgbaUnorm:
+                        result = ETC2Decoder.DecodeRgba(result, width, height, depth, levels, layers);
+                        break;
+                    case Format.Etc2RgbPtaSrgb:
+                    case Format.Etc2RgbPtaUnorm:
+                        result = ETC2Decoder.DecodePta(result, width, height, depth, levels, layers);
+                        break;
+                    case Format.Etc2RgbSrgb:
+                    case Format.Etc2RgbUnorm:
+                        result = ETC2Decoder.DecodeRgb(result, width, height, depth, levels, layers);
+                        break;
+                }
             }
             else if (!TextureCompatibility.HostSupportsBcFormat(Format, Target, _context.Capabilities))
             {
@@ -895,6 +909,10 @@ namespace Ryujinx.Graphics.Gpu.Image
                         break;
                 }
             }
+            else if (!_context.Capabilities.SupportsR4G4Format && Format == Format.R4G4Unorm)
+            {
+                result = PixelConverter.ConvertR4G4ToR4G4B4A4(result);
+            }
 
             return result;
         }
diff --git a/Ryujinx.Graphics.Gpu/Image/TextureCompatibility.cs b/Ryujinx.Graphics.Gpu/Image/TextureCompatibility.cs
index 91a1a728d2..642e03b68e 100644
--- a/Ryujinx.Graphics.Gpu/Image/TextureCompatibility.cs
+++ b/Ryujinx.Graphics.Gpu/Image/TextureCompatibility.cs
@@ -67,6 +67,10 @@ namespace Ryujinx.Graphics.Gpu.Image
         /// <returns>A host compatible format</returns>
         public static FormatInfo ToHostCompatibleFormat(TextureInfo info, Capabilities caps)
         {
+            // The host API does not support those compressed formats.
+            // We assume software decompression will be done for those textures,
+            // and so we adjust the format here to match the decompressor output.
+
             if (!caps.SupportsAstcCompression)
             {
                 if (info.FormatInfo.Format.IsAstcUnorm())
@@ -83,16 +87,8 @@ namespace Ryujinx.Graphics.Gpu.Image
                 }
             }
 
-            if (!caps.SupportsR4G4Format && info.FormatInfo.Format == Format.R4G4Unorm)
-            {
-                return new FormatInfo(Format.R4G4B4A4Unorm, 1, 1, 2, 4);
-            }
-
             if (!HostSupportsBcFormat(info.FormatInfo.Format, info.Target, caps))
             {
-                // The host API does not this compressed format.
-                // We assume software decompression will be done for those textures,
-                // and so we adjust the format here to match the decompressor output.
                 switch (info.FormatInfo.Format)
                 {
                     case Format.Bc1RgbaSrgb:
@@ -119,6 +115,26 @@ namespace Ryujinx.Graphics.Gpu.Image
                 }
             }
 
+            if (!caps.SupportsEtc2Compression)
+            {
+                switch (info.FormatInfo.Format)
+                {
+                    case Format.Etc2RgbaSrgb:
+                    case Format.Etc2RgbPtaSrgb:
+                    case Format.Etc2RgbSrgb:
+                        return new FormatInfo(Format.R8G8B8A8Srgb, 1, 1, 4, 4);
+                    case Format.Etc2RgbaUnorm:
+                    case Format.Etc2RgbPtaUnorm:
+                    case Format.Etc2RgbUnorm:
+                        return new FormatInfo(Format.R8G8B8A8Unorm, 1, 1, 4, 4);
+                }
+            }
+
+            if (!caps.SupportsR4G4Format && info.FormatInfo.Format == Format.R4G4Unorm)
+            {
+                return new FormatInfo(Format.R4G4B4A4Unorm, 1, 1, 2, 4);
+            }
+
             return info.FormatInfo;
         }
 
diff --git a/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs b/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs
index 9e008b33eb..59ca6afdc4 100644
--- a/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs
+++ b/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs
@@ -110,6 +110,7 @@ namespace Ryujinx.Graphics.OpenGL
                 supportsBc123Compression: HwCapabilities.SupportsTextureCompressionS3tc,
                 supportsBc45Compression: HwCapabilities.SupportsTextureCompressionRgtc,
                 supportsBc67Compression: true, // Should check BPTC extension, but for some reason NVIDIA is not exposing the extension.
+                supportsEtc2Compression: true,
                 supports3DTextureCompression: false,
                 supportsBgraFormat: false,
                 supportsR4G4Format: false,
diff --git a/Ryujinx.Graphics.Texture/ETC2Decoder.cs b/Ryujinx.Graphics.Texture/ETC2Decoder.cs
new file mode 100644
index 0000000000..21ff4be4b7
--- /dev/null
+++ b/Ryujinx.Graphics.Texture/ETC2Decoder.cs
@@ -0,0 +1,682 @@
+using Ryujinx.Common;
+using System;
+using System.Buffers.Binary;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Texture
+{
+    public static class ETC2Decoder
+    {
+        private const uint AlphaMask = 0xff000000u;
+
+        private const int BlockWidth = 4;
+        private const int BlockHeight = 4;
+
+        private static readonly int[][] _etc1Lut =
+        {
+            new int[] { 2, 8, -2, -8 },
+            new int[] { 5, 17, -5, -17 },
+            new int[] { 9, 29, -9, -29 },
+            new int[] { 13, 42, -13, -42 },
+            new int[] { 18, 60, -18, -60 },
+            new int[] { 24, 80, -24, -80 },
+            new int[] { 33, 106, -33, -106 },
+            new int[] { 47, 183, -47, -183 }
+        };
+
+        private static readonly int[] _etc2Lut =
+        {
+            3, 6, 11, 16, 23, 32, 41, 64
+        };
+
+        private static readonly int[][] _etc2AlphaLut =
+        {
+            new int[] { -3, -6, -9, -15, 2, 5, 8, 14 },
+            new int[] { -3, -7, -10, -13, 2, 6, 9, 12 },
+            new int[] { -2, -5, -8, -13, 1, 4, 7, 12 },
+            new int[] { -2, -4, -6, -13, 1, 3, 5, 12 },
+            new int[] { -3, -6, -8, -12, 2, 5, 7, 11 },
+            new int[] { -3, -7, -9, -11, 2, 6, 8, 10 },
+            new int[] { -4, -7, -8, -11, 3, 6, 7, 10 },
+            new int[] { -3, -5, -8, -11, 2, 4, 7, 10 },
+            new int[] { -2, -6, -8, -10, 1, 5, 7, 9 },
+            new int[] { -2, -5, -8, -10, 1, 4, 7, 9 },
+            new int[] { -2, -4, -8, -10, 1, 3, 7, 9 },
+            new int[] { -2, -5, -7, -10, 1, 4, 6, 9 },
+            new int[] { -3, -4, -7, -10, 2, 3, 6, 9 },
+            new int[] { -1, -2, -3, -10, 0, 1, 2, 9 },
+            new int[] { -4, -6, -8, -9, 3, 5, 7, 8 },
+            new int[] { -3, -5, -7, -9, 2, 4, 6, 8 }
+        };
+
+        public static byte[] DecodeRgb(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers)
+        {
+            ReadOnlySpan<ulong> dataUlong = MemoryMarshal.Cast<byte, ulong>(data);
+
+            int inputOffset = 0;
+
+            byte[] output = new byte[CalculateOutputSize(width, height, depth, levels, layers)];
+
+            Span<uint> outputUint = MemoryMarshal.Cast<byte, uint>(output);
+            Span<uint> tile = stackalloc uint[BlockWidth * BlockHeight];
+
+            int imageBaseOOffs = 0;
+
+            for (int l = 0; l < levels; l++)
+            {
+                int wInBlocks = BitUtils.DivRoundUp(width, BlockWidth);
+                int hInBlocks = BitUtils.DivRoundUp(height, BlockHeight);
+
+                for (int l2 = 0; l2 < layers; l2++)
+                {
+                    for (int z = 0; z < depth; z++)
+                    {
+                        for (int y = 0; y < hInBlocks; y++)
+                        {
+                            int ty = y * BlockHeight;
+                            int bh = Math.Min(BlockHeight, height - ty);
+
+                            for (int x = 0; x < wInBlocks; x++)
+                            {
+                                int tx = x * BlockWidth;
+                                int bw = Math.Min(BlockWidth, width - tx);
+
+                                ulong colorBlock = dataUlong[inputOffset++];
+
+                                DecodeBlock(tile, colorBlock);
+
+                                for (int py = 0; py < bh; py++)
+                                {
+                                    int oOffsBase = imageBaseOOffs + ((ty + py) * width) + tx;
+
+                                    for (int px = 0; px < bw; px++)
+                                    {
+                                        int oOffs = oOffsBase + px;
+
+                                        outputUint[oOffs] = tile[py * BlockWidth + px] | AlphaMask;
+                                    }
+                                }
+                            }
+                        }
+
+                        imageBaseOOffs += width * height;
+                    }
+                }
+
+                width = Math.Max(1, width >> 1);
+                height = Math.Max(1, height >> 1);
+                depth = Math.Max(1, depth >> 1);
+            }
+
+            return output;
+        }
+
+        public static byte[] DecodePta(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers)
+        {
+            ReadOnlySpan<ulong> dataUlong = MemoryMarshal.Cast<byte, ulong>(data);
+
+            int inputOffset = 0;
+
+            byte[] output = new byte[CalculateOutputSize(width, height, depth, levels, layers)];
+
+            Span<uint> outputUint = MemoryMarshal.Cast<byte, uint>(output);
+            Span<uint> tile = stackalloc uint[BlockWidth * BlockHeight];
+
+            int imageBaseOOffs = 0;
+
+            for (int l = 0; l < levels; l++)
+            {
+                int wInBlocks = BitUtils.DivRoundUp(width, BlockWidth);
+                int hInBlocks = BitUtils.DivRoundUp(height, BlockHeight);
+
+                for (int l2 = 0; l2 < layers; l2++)
+                {
+                    for (int z = 0; z < depth; z++)
+                    {
+                        for (int y = 0; y < hInBlocks; y++)
+                        {
+                            int ty = y * BlockHeight;
+                            int bh = Math.Min(BlockHeight, height - ty);
+
+                            for (int x = 0; x < wInBlocks; x++)
+                            {
+                                int tx = x * BlockWidth;
+                                int bw = Math.Min(BlockWidth, width - tx);
+
+                                ulong colorBlock = dataUlong[inputOffset++];
+
+                                DecodeBlockPta(tile, colorBlock);
+
+                                for (int py = 0; py < bh; py++)
+                                {
+                                    int oOffsBase = imageBaseOOffs + ((ty + py) * width) + tx;
+
+                                    tile.Slice(py * BlockWidth, bw).CopyTo(outputUint.Slice(oOffsBase, bw));
+                                }
+                            }
+                        }
+
+                        imageBaseOOffs += width * height;
+                    }
+                }
+
+                width = Math.Max(1, width >> 1);
+                height = Math.Max(1, height >> 1);
+                depth = Math.Max(1, depth >> 1);
+            }
+
+            return output;
+        }
+
+        public static byte[] DecodeRgba(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers)
+        {
+            ReadOnlySpan<ulong> dataUlong = MemoryMarshal.Cast<byte, ulong>(data);
+
+            int inputOffset = 0;
+
+            byte[] output = new byte[CalculateOutputSize(width, height, depth, levels, layers)];
+
+            Span<uint> outputUint = MemoryMarshal.Cast<byte, uint>(output);
+            Span<uint> tile = stackalloc uint[BlockWidth * BlockHeight];
+
+            int imageBaseOOffs = 0;
+
+            for (int l = 0; l < levels; l++)
+            {
+                int wInBlocks = BitUtils.DivRoundUp(width, BlockWidth);
+                int hInBlocks = BitUtils.DivRoundUp(height, BlockHeight);
+
+                for (int l2 = 0; l2 < layers; l2++)
+                {
+                    for (int z = 0; z < depth; z++)
+                    {
+                        for (int y = 0; y < hInBlocks; y++)
+                        {
+                            int ty = y * BlockHeight;
+                            int bh = Math.Min(BlockHeight, height - ty);
+
+                            for (int x = 0; x < wInBlocks; x++)
+                            {
+                                int tx = x * BlockWidth;
+                                int bw = Math.Min(BlockWidth, width - tx);
+
+                                ulong alphaBlock = dataUlong[inputOffset];
+                                ulong colorBlock = dataUlong[inputOffset + 1];
+
+                                inputOffset += 2;
+
+                                DecodeBlock(tile, colorBlock);
+
+                                byte alphaBase = (byte)alphaBlock;
+                                int[] alphaTable = _etc2AlphaLut[(alphaBlock >> 8) & 0xf];
+                                int alphaMultiplier = (int)(alphaBlock >> 12) & 0xf;
+                                ulong alphaIndices = BinaryPrimitives.ReverseEndianness(alphaBlock);
+
+                                if (alphaMultiplier != 0)
+                                {
+                                    for (int py = 0; py < bh; py++)
+                                    {
+                                        int oOffsBase = imageBaseOOffs + ((ty + py) * width) + tx;
+
+                                        for (int px = 0; px < bw; px++)
+                                        {
+                                            int oOffs = oOffsBase + px;
+                                            int alphaIndex = (int)((alphaIndices >> (((px * BlockHeight + py) ^ 0xf) * 3)) & 7);
+
+                                            byte a = Saturate(alphaBase + alphaTable[alphaIndex] * alphaMultiplier);
+
+                                            outputUint[oOffs] = tile[py * BlockWidth + px] | ((uint)a << 24);
+                                        }
+                                    }
+                                }
+                                else
+                                {
+                                    uint a = (uint)alphaBase << 24;
+
+                                    for (int py = 0; py < bh; py++)
+                                    {
+                                        int oOffsBase = imageBaseOOffs + ((ty + py) * width) + tx;
+
+                                        for (int px = 0; px < bw; px++)
+                                        {
+                                            int oOffs = oOffsBase + px;
+
+                                            outputUint[oOffs] = tile[py * BlockWidth + px] | a;
+                                        }
+                                    }
+                                }
+                            }
+                        }
+
+                        imageBaseOOffs += width * height;
+                    }
+                }
+
+                width = Math.Max(1, width >> 1);
+                height = Math.Max(1, height >> 1);
+                depth = Math.Max(1, depth >> 1);
+            }
+
+            return output;
+        }
+
+        private static void DecodeBlock(Span<uint> tile, ulong block)
+        {
+            uint blockLow = (uint)(block >> 0);
+            uint blockHigh = (uint)(block >> 32);
+
+            uint r1, g1, b1;
+            uint r2, g2, b2;
+
+            bool differentialMode = (blockLow & 0x2000000) != 0;
+
+            if (differentialMode)
+            {
+                (r1, g1, b1, r2, g2, b2) = UnpackRgb555DiffEndPoints(blockLow);
+
+                if (r2 > 31)
+                {
+                    DecodeBlock59T(tile, blockLow, blockHigh);
+                }
+                else if (g2 > 31)
+                {
+                    DecodeBlock58H(tile, blockLow, blockHigh);
+                }
+                else if (b2 > 31)
+                {
+                    DecodeBlock57P(tile, block);
+                }
+                else
+                {
+                    r1 |= r1 >> 5;
+                    g1 |= g1 >> 5;
+                    b1 |= b1 >> 5;
+
+                    r2 = (r2 << 3) | (r2 >> 2);
+                    g2 = (g2 << 3) | (g2 >> 2);
+                    b2 = (b2 << 3) | (b2 >> 2);
+
+                    DecodeBlockETC1(tile, blockLow, blockHigh, r1, g1, b1, r2, g2, b2);
+                }
+            }
+            else
+            {
+                r1 = (blockLow & 0x0000f0) >> 0;
+                g1 = (blockLow & 0x00f000) >> 8;
+                b1 = (blockLow & 0xf00000) >> 16;
+
+                r2 = (blockLow & 0x00000f) << 4;
+                g2 = (blockLow & 0x000f00) >> 4;
+                b2 = (blockLow & 0x0f0000) >> 12;
+
+                r1 |= r1 >> 4;
+                g1 |= g1 >> 4;
+                b1 |= b1 >> 4;
+
+                r2 |= r2 >> 4;
+                g2 |= g2 >> 4;
+                b2 |= b2 >> 4;
+
+                DecodeBlockETC1(tile, blockLow, blockHigh, r1, g1, b1, r2, g2, b2);
+            }
+        }
+
+        private static void DecodeBlockPta(Span<uint> tile, ulong block)
+        {
+            uint blockLow = (uint)(block >> 0);
+            uint blockHigh = (uint)(block >> 32);
+
+            (uint r1, uint g1, uint b1, uint r2, uint g2, uint b2) = UnpackRgb555DiffEndPoints(blockLow);
+
+            bool fullyOpaque = (blockLow & 0x2000000) != 0;
+
+            if (fullyOpaque)
+            {
+                if (r2 > 31)
+                {
+                    DecodeBlock59T(tile, blockLow, blockHigh);
+                }
+                else if (g2 > 31)
+                {
+                    DecodeBlock58H(tile, blockLow, blockHigh);
+                }
+                else if (b2 > 31)
+                {
+                    DecodeBlock57P(tile, block);
+                }
+                else
+                {
+                    r1 |= r1 >> 5;
+                    g1 |= g1 >> 5;
+                    b1 |= b1 >> 5;
+
+                    r2 = (r2 << 3) | (r2 >> 2);
+                    g2 = (g2 << 3) | (g2 >> 2);
+                    b2 = (b2 << 3) | (b2 >> 2);
+
+                    DecodeBlockETC1(tile, blockLow, blockHigh, r1, g1, b1, r2, g2, b2);
+                }
+
+                for (int i = 0; i < tile.Length; i++)
+                {
+                    tile[i] |= AlphaMask;
+                }
+            }
+            else
+            {
+                if (r2 > 31)
+                {
+                    DecodeBlock59T(tile, blockLow, blockHigh, AlphaMask);
+                }
+                else if (g2 > 31)
+                {
+                    DecodeBlock58H(tile, blockLow, blockHigh, AlphaMask);
+                }
+                else if (b2 > 31)
+                {
+                    DecodeBlock57P(tile, block);
+
+                    for (int i = 0; i < tile.Length; i++)
+                    {
+                        tile[i] |= AlphaMask;
+                    }
+                }
+                else
+                {
+                    r1 |= r1 >> 5;
+                    g1 |= g1 >> 5;
+                    b1 |= b1 >> 5;
+
+                    r2 = (r2 << 3) | (r2 >> 2);
+                    g2 = (g2 << 3) | (g2 >> 2);
+                    b2 = (b2 << 3) | (b2 >> 2);
+
+                    DecodeBlockETC1(tile, blockLow, blockHigh, r1, g1, b1, r2, g2, b2, AlphaMask);
+                }
+            }
+        }
+
+        private static (uint, uint, uint, uint, uint, uint) UnpackRgb555DiffEndPoints(uint blockLow)
+        {
+            uint r1 = (blockLow & 0x0000f8) >> 0;
+            uint g1 = (blockLow & 0x00f800) >> 8;
+            uint b1 = (blockLow & 0xf80000) >> 16;
+
+            uint r2 = (uint)((sbyte)(r1 >> 3) + ((sbyte)((blockLow & 0x000007) << 5) >> 5));
+            uint g2 = (uint)((sbyte)(g1 >> 3) + ((sbyte)((blockLow & 0x000700) >> 3) >> 5));
+            uint b2 = (uint)((sbyte)(b1 >> 3) + ((sbyte)((blockLow & 0x070000) >> 11) >> 5));
+
+            return (r1, g1, b1, r2, g2, b2);
+        }
+
+        private static void DecodeBlock59T(Span<uint> tile, uint blockLow, uint blockHigh, uint alphaMask = 0)
+        {
+            uint r1 = (blockLow & 3) | ((blockLow >> 1) & 0xc);
+            uint g1 = (blockLow >> 12) & 0xf;
+            uint b1 = (blockLow >> 8) & 0xf;
+
+            uint r2 = (blockLow >> 20) & 0xf;
+            uint g2 = (blockLow >> 16) & 0xf;
+            uint b2 = (blockLow >> 28) & 0xf;
+
+            r1 |= r1 << 4;
+            g1 |= g1 << 4;
+            b1 |= b1 << 4;
+
+            r2 |= r2 << 4;
+            g2 |= g2 << 4;
+            b2 |= b2 << 4;
+
+            int dist = _etc2Lut[((blockLow >> 24) & 1) | ((blockLow >> 25) & 6)];
+
+            Span<uint> palette = stackalloc uint[4];
+
+            palette[0] = Pack(r1, g1, b1);
+            palette[1] = Pack(r2, g2, b2, dist);
+            palette[2] = Pack(r2, g2, b2);
+            palette[3] = Pack(r2, g2, b2, -dist);
+
+            blockHigh = BinaryPrimitives.ReverseEndianness(blockHigh);
+
+            for (int y = 0; y < BlockHeight; y++)
+            {
+                for (int x = 0; x < BlockWidth; x++)
+                {
+                    int offset = (y * 4) + x;
+                    int index = (x * 4) + y;
+
+                    int paletteIndex = (int)((blockHigh >> index) & 1) | (int)((blockHigh >> (index + 15)) & 2);
+
+                    tile[offset] = palette[paletteIndex];
+
+                    if (alphaMask != 0)
+                    {
+                        if (paletteIndex == 2)
+                        {
+                            tile[offset] = 0;
+                        }
+                        else
+                        {
+                            tile[offset] |= alphaMask;
+                        }
+                    }
+                }
+            }
+        }
+
+        private static void DecodeBlock58H(Span<uint> tile, uint blockLow, uint blockHigh, uint alphaMask = 0)
+        {
+            uint r1 = (blockLow >> 3) & 0xf;
+            uint g1 = ((blockLow << 1) & 0xe) | ((blockLow >> 12) & 1);
+            uint b1 = ((blockLow >> 23) & 1) | ((blockLow >> 7) & 6) | ((blockLow >> 8) & 8);
+
+            uint r2 = (blockLow >> 19) & 0xf;
+            uint g2 = ((blockLow >> 31) & 1) | ((blockLow >> 15) & 0xe);
+            uint b2 = (blockLow >> 27) & 0xf;
+
+            uint rgb1 = Pack4Be(r1, g1, b1);
+            uint rgb2 = Pack4Be(r2, g2, b2);
+
+            r1 |= r1 << 4;
+            g1 |= g1 << 4;
+            b1 |= b1 << 4;
+
+            r2 |= r2 << 4;
+            g2 |= g2 << 4;
+            b2 |= b2 << 4;
+
+            int dist = _etc2Lut[(rgb1 >= rgb2 ? 1u : 0u) | ((blockLow >> 23) & 2) | ((blockLow >> 24) & 4)];
+
+            Span<uint> palette = stackalloc uint[4];
+
+            palette[0] = Pack(r1, g1, b1, dist);
+            palette[1] = Pack(r1, g1, b1, -dist);
+            palette[2] = Pack(r2, g2, b2, dist);
+            palette[3] = Pack(r2, g2, b2, -dist);
+
+            blockHigh = BinaryPrimitives.ReverseEndianness(blockHigh);
+
+            for (int y = 0; y < BlockHeight; y++)
+            {
+                for (int x = 0; x < BlockWidth; x++)
+                {
+                    int offset = (y * 4) + x;
+                    int index = (x * 4) + y;
+
+                    int paletteIndex = (int)((blockHigh >> index) & 1) | (int)((blockHigh >> (index + 15)) & 2);
+
+                    tile[offset] = palette[paletteIndex];
+
+                    if (alphaMask != 0)
+                    {
+                        if (paletteIndex == 2)
+                        {
+                            tile[offset] = 0;
+                        }
+                        else
+                        {
+                            tile[offset] |= alphaMask;
+                        }
+                    }
+                }
+            }
+        }
+
+        private static void DecodeBlock57P(Span<uint> tile, ulong block)
+        {
+            int r0 = (int)((block >> 1) & 0x3f);
+            int g0 = (int)(((block >> 9) & 0x3f) | ((block & 1) << 6));
+            int b0 = (int)(((block >> 31) & 1) | ((block >> 15) & 6) | ((block >> 16) & 0x18) | ((block >> 3) & 0x20));
+
+            int rh = (int)(((block >> 24) & 1) | ((block >> 25) & 0x3e));
+            int gh = (int)((block >> 33) & 0x7f);
+            int bh = (int)(((block >> 43) & 0x1f) | ((block >> 27) & 0x20));
+
+            int rv = (int)(((block >> 53) & 7) | ((block >> 37) & 0x38));
+            int gv = (int)(((block >> 62) & 3) | ((block >> 46) & 0x7c));
+            int bv = (int)((block >> 56) & 0x3f);
+
+            r0 = (r0 << 2) | (r0 >> 4);
+            g0 = (g0 << 1) | (g0 >> 6);
+            b0 = (b0 << 2) | (b0 >> 4);
+
+            rh = (rh << 2) | (rh >> 4);
+            gh = (gh << 1) | (gh >> 6);
+            bh = (bh << 2) | (bh >> 4);
+
+            rv = (rv << 2) | (rv >> 4);
+            gv = (gv << 1) | (gv >> 6);
+            bv = (bv << 2) | (bv >> 4);
+
+            for (int y = 0; y < BlockHeight; y++)
+            {
+                for (int x = 0; x < BlockWidth; x++)
+                {
+                    int offset = y * BlockWidth + x;
+
+                    byte r = Saturate(((x * (rh - r0)) + (y * (rv - r0)) + (r0 * 4) + 2) >> 2);
+                    byte g = Saturate(((x * (gh - g0)) + (y * (gv - g0)) + (g0 * 4) + 2) >> 2);
+                    byte b = Saturate(((x * (bh - b0)) + (y * (bv - b0)) + (b0 * 4) + 2) >> 2);
+
+                    tile[offset] = Pack(r, g, b);
+                }
+            }
+        }
+
+        private static void DecodeBlockETC1(
+            Span<uint> tile,
+            uint blockLow,
+            uint blockHigh,
+            uint r1,
+            uint g1,
+            uint b1,
+            uint r2,
+            uint g2,
+            uint b2,
+            uint alphaMask = 0)
+        {
+            int[] table1 = _etc1Lut[(blockLow >> 29) & 7];
+            int[] table2 = _etc1Lut[(blockLow >> 26) & 7];
+
+            bool flip = (blockLow & 0x1000000) != 0;
+
+            if (!flip)
+            {
+                for (int y = 0; y < BlockHeight; y++)
+                {
+                    for (int x = 0; x < BlockWidth / 2; x++)
+                    {
+                        uint color1 = CalculatePixel(r1, g1, b1, x + 0, y, blockHigh, table1, alphaMask);
+                        uint color2 = CalculatePixel(r2, g2, b2, x + 2, y, blockHigh, table2, alphaMask);
+
+                        int offset1 = y * BlockWidth + x;
+                        int offset2 = y * BlockWidth + x + 2;
+
+                        tile[offset1] = color1;
+                        tile[offset2] = color2;
+                    }
+                }
+            }
+            else
+            {
+                for (int y = 0; y < BlockHeight / 2; y++)
+                {
+                    for (int x = 0; x < BlockWidth; x++)
+                    {
+                        uint color1 = CalculatePixel(r1, g1, b1, x, y + 0, blockHigh, table1, alphaMask);
+                        uint color2 = CalculatePixel(r2, g2, b2, x, y + 2, blockHigh, table2, alphaMask);
+
+                        int offset1 = (y * BlockWidth) + x;
+                        int offset2 = ((y + 2) * BlockWidth) + x;
+
+                        tile[offset1] = color1;
+                        tile[offset2] = color2;
+                    }
+                }
+            }
+        }
+
+        private static uint CalculatePixel(uint r, uint g, uint b, int x, int y, uint block, int[] table, uint alphaMask)
+        {
+            int index = x * BlockHeight + y;
+            uint msb = block << 1;
+            uint tableIndex = index < 8
+                ? ((block >> (index + 24)) & 1) + ((msb >> (index + 8)) & 2)
+                : ((block >> (index + 8)) & 1) + ((msb >> (index - 8)) & 2);
+
+            if (alphaMask != 0)
+            {
+                if (tableIndex == 0)
+                {
+                    return Pack(r, g, b) | alphaMask;
+                }
+                else if (tableIndex == 2)
+                {
+                    return 0;
+                }
+                else
+                {
+                    return Pack(r, g, b, table[tableIndex]) | alphaMask;
+                }
+            }
+
+            return Pack(r, g, b, table[tableIndex]);
+        }
+
+        private static uint Pack(uint r, uint g, uint b, int offset)
+        {
+            r = Saturate((int)(r + offset));
+            g = Saturate((int)(g + offset));
+            b = Saturate((int)(b + offset));
+
+            return Pack(r, g, b);
+        }
+
+        private static uint Pack(uint r, uint g, uint b)
+        {
+            return r | (g << 8) | (b << 16);
+        }
+
+        private static uint Pack4Be(uint r, uint g, uint b)
+        {
+            return (r << 8) | (g << 4) | b;
+        }
+
+        private static byte Saturate(int value)
+        {
+            return value > byte.MaxValue ? byte.MaxValue : value < byte.MinValue ? byte.MinValue : (byte)value;
+        }
+
+        private static int CalculateOutputSize(int width, int height, int depth, int levels, int layers)
+        {
+            int size = 0;
+
+            for (int l = 0; l < levels; l++)
+            {
+                size += Math.Max(1, width >> l) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers * 4;
+            }
+
+            return size;
+        }
+    }
+}
diff --git a/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs b/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs
index fa9d4cb0c0..a32400f565 100644
--- a/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs
+++ b/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs
@@ -388,6 +388,13 @@ namespace Ryujinx.Graphics.Vulkan
                 GAL.Format.Bc7Srgb,
                 GAL.Format.Bc7Unorm);
 
+            bool supportsEtc2CompressionFormat = FormatCapabilities.OptimalFormatsSupport(compressedFormatFeatureFlags,
+                GAL.Format.Etc2RgbaSrgb,
+                GAL.Format.Etc2RgbaUnorm,
+                GAL.Format.Etc2RgbPtaSrgb,
+                GAL.Format.Etc2RgbPtaUnorm,
+                GAL.Format.Etc2RgbSrgb,
+                GAL.Format.Etc2RgbUnorm);
 
             PhysicalDeviceVulkan12Features featuresVk12 = new PhysicalDeviceVulkan12Features()
             {
@@ -414,6 +421,7 @@ namespace Ryujinx.Graphics.Vulkan
                 supportsBc123Compression: supportsBc123CompressionFormat,
                 supportsBc45Compression: supportsBc45CompressionFormat,
                 supportsBc67Compression: supportsBc67CompressionFormat,
+                supportsEtc2Compression: supportsEtc2CompressionFormat,
                 supports3DTextureCompression: true,
                 supportsBgraFormat: true,
                 supportsR4G4Format: false,