From baecc18d8c5365af0dddb231bc8c0a9c03850bf6 Mon Sep 17 00:00:00 2001
From: SachinVin <26602104+SachinVin@users.noreply.github.com>
Date: Sat, 10 Sep 2022 15:35:13 +0530
Subject: [PATCH] Partially Revert "renderer_opengl: Remove amd hacks and
 legacy paths" (#6122)

This reverts commit 07a69b7c7b2f469cb946c4caec42981e7a4400ad.
---
 .../renderer_opengl/gl_rasterizer.cpp         | 21 ++++++++++++-------
 .../renderer_opengl/gl_rasterizer.h           |  2 ++
 .../renderer_opengl/gl_shader_manager.cpp     | 19 +++++++++++++----
 .../renderer_opengl/gl_shader_manager.h       |  2 +-
 .../renderer_opengl/gl_stream_buffer.cpp      | 11 +++++++++-
 .../renderer_opengl/gl_stream_buffer.h        |  3 ++-
 6 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index a18f1f6f7..4a184d38f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -26,6 +26,10 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
 MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
 MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
 
+static bool IsVendorAmd() {
+    const std::string_view gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
+    return gpu_vendor == "ATI Technologies Inc." || gpu_vendor == "Advanced Micro Devices, Inc.";
+}
 #ifdef __APPLE__
 static bool IsVendorIntel() {
     std::string gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
@@ -34,11 +38,11 @@ static bool IsVendorIntel() {
 #endif
 
 RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window)
-    : vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE),
-      uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE),
-      index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE),
-      texture_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE),
-      texture_lf_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE) {
+    : is_amd(IsVendorAmd()), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE, is_amd),
+      uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE, false),
+      index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE, false),
+      texture_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false),
+      texture_lf_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false) {
 
     // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
     state.clip_distance[0] = true;
@@ -144,12 +148,13 @@ RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window)
 
 #ifdef __APPLE__
     if (IsVendorIntel()) {
-        shader_program_manager = std::make_unique<ShaderProgramManager>(emu_window, false);
+        shader_program_manager = std::make_unique<ShaderProgramManager>(
+            emu_window, VideoCore::g_separable_shader_enabled, is_amd);
     } else {
-        shader_program_manager = std::make_unique<ShaderProgramManager>(emu_window, false);
+        shader_program_manager = std::make_unique<ShaderProgramManager>(emu_window, true, is_amd);
     }
 #else
-    shader_program_manager = std::make_unique<ShaderProgramManager>(emu_window, !GLES);
+    shader_program_manager = std::make_unique<ShaderProgramManager>(emu_window, !GLES, is_amd);
 #endif
 
     glEnable(GL_BLEND);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index f4e4eb8d6..98008ad99 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -247,6 +247,8 @@ private:
     /// Setup geometry shader for AccelerateDrawBatch
     bool SetupGeometryShader();
 
+    bool is_amd;
+
     OpenGLState state;
     GLuint default_texture;
 
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index fecc48583..bd4399feb 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -327,8 +327,8 @@ using FragmentShaders = ShaderCache<PicaFSConfig, &GenerateFragmentShader, GL_FR
 
 class ShaderProgramManager::Impl {
 public:
-    explicit Impl(bool separable)
-        : separable(separable), programmable_vertex_shaders(separable),
+    explicit Impl(bool separable, bool is_amd)
+        : is_amd(is_amd), separable(separable), programmable_vertex_shaders(separable),
           trivial_vertex_shader(separable), fixed_geometry_shaders(separable),
           fragment_shaders(separable), disk_cache(separable) {
         if (separable)
@@ -361,6 +361,7 @@ public:
         }
     };
 
+    bool is_amd;
     bool separable;
 
     ShaderTuple current;
@@ -376,8 +377,9 @@ public:
     ShaderDiskCache disk_cache;
 };
 
-ShaderProgramManager::ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable)
-    : impl(std::make_unique<Impl>(separable)), emu_window{emu_window_} {}
+ShaderProgramManager::ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable,
+                                           bool is_amd)
+    : impl(std::make_unique<Impl>(separable, is_amd)), emu_window{emu_window_} {}
 
 ShaderProgramManager::~ShaderProgramManager() = default;
 
@@ -439,6 +441,15 @@ void ShaderProgramManager::UseFragmentShader(const Pica::Regs& regs) {
 
 void ShaderProgramManager::ApplyTo(OpenGLState& state) {
     if (impl->separable) {
+        if (impl->is_amd) {
+            // Without this reseting, AMD sometimes freezes when one stage is changed but not
+            // for the others. On the other hand, including this reset seems to introduce memory
+            // leak in Intel Graphics.
+            glUseProgramStages(
+                impl->pipeline.handle,
+                GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, 0);
+        }
+
         glUseProgramStages(impl->pipeline.handle, GL_VERTEX_SHADER_BIT, impl->current.vs);
         glUseProgramStages(impl->pipeline.handle, GL_GEOMETRY_SHADER_BIT, impl->current.gs);
         glUseProgramStages(impl->pipeline.handle, GL_FRAGMENT_SHADER_BIT, impl->current.fs);
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index e487461ad..66883e7e6 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -108,7 +108,7 @@ class OpenGLState;
 /// A class that manage different shader stages and configures them with given config data.
 class ShaderProgramManager {
 public:
-    ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable);
+    ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable, bool is_amd);
     ~ShaderProgramManager();
 
     void LoadDiskCache(const std::atomic_bool& stop_loading,
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index 7985906c5..1ff5ba62d 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -12,12 +12,21 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
 
 namespace OpenGL {
 
-OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent)
+OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool array_buffer_for_amd,
+                                 bool prefer_coherent)
     : gl_target(target), buffer_size(size) {
     gl_buffer.Create();
     glBindBuffer(gl_target, gl_buffer.handle);
 
     GLsizeiptr allocate_size = size;
+    if (array_buffer_for_amd) {
+        // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer
+        // read position is near the end and is an out-of-bound access to the vertex buffer. This is
+        // probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
+        // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the
+        // crash.
+        allocate_size *= 2;
+    }
 
     if (GLAD_GL_ARB_buffer_storage) {
         persistent = true;
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index da978735d..1a2853198 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -10,7 +10,8 @@ namespace OpenGL {
 
 class OGLStreamBuffer : private NonCopyable {
 public:
-    explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent = false);
+    explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool array_buffer_for_amd,
+                             bool prefer_coherent = false);
     ~OGLStreamBuffer();
 
     GLuint GetHandle() const;