From e3f4233cefff611e03a2031c6194a118d946a5d9 Mon Sep 17 00:00:00 2001
From: Subv <subv2112@gmail.com>
Date: Sat, 25 Jul 2015 20:13:11 -0500
Subject: [PATCH] Initial implementation of fragment shader generation with
 caching.

---
 src/video_core/pica.h                         |   4 +
 .../renderer_opengl/gl_rasterizer.cpp         | 373 +++++++-----------
 .../renderer_opengl/gl_rasterizer.h           | 110 ++++--
 .../renderer_opengl/gl_shader_util.cpp        | 349 ++++++++++++++++
 .../renderer_opengl/gl_shader_util.h          |   6 +
 src/video_core/renderer_opengl/gl_shaders.h   |   8 +-
 src/video_core/renderer_opengl/gl_state.h     |   1 +
 7 files changed, 579 insertions(+), 272 deletions(-)

diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index ff81b409d..18fdc8c85 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -317,6 +317,7 @@ struct Regs {
         };
 
         union {
+            u32 source_raw;
             BitField< 0, 4, Source> color_source1;
             BitField< 4, 4, Source> color_source2;
             BitField< 8, 4, Source> color_source3;
@@ -326,6 +327,7 @@ struct Regs {
         };
 
         union {
+            u32 modifier_raw;
             BitField< 0, 4, ColorModifier> color_modifier1;
             BitField< 4, 4, ColorModifier> color_modifier2;
             BitField< 8, 4, ColorModifier> color_modifier3;
@@ -335,6 +337,7 @@ struct Regs {
         };
 
         union {
+            u32 op_raw;
             BitField< 0, 4, Operation> color_op;
             BitField<16, 4, Operation> alpha_op;
         };
@@ -348,6 +351,7 @@ struct Regs {
         };
 
         union {
+            u32 scale_raw;
             BitField< 0, 2, u32> color_scale;
             BitField<16, 2, u32> alpha_scale;
         };
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index a613fe136..45329d561 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -8,6 +8,7 @@
 #include <glad/glad.h>
 
 #include "common/color.h"
+#include "common/file_util.h"
 #include "common/math_util.h"
 #include "common/microprofile.h"
 #include "common/profiler.h"
@@ -38,36 +39,6 @@ RasterizerOpenGL::RasterizerOpenGL() : last_fb_color_addr(0), last_fb_depth_addr
 RasterizerOpenGL::~RasterizerOpenGL() { }
 
 void RasterizerOpenGL::InitObjects() {
-    // Create the hardware shader program and get attrib/uniform locations
-    shader.Create(GLShaders::g_vertex_shader_hw, GLShaders::g_fragment_shader_hw);
-    attrib_position = glGetAttribLocation(shader.handle, "vert_position");
-    attrib_color = glGetAttribLocation(shader.handle, "vert_color");
-    attrib_texcoords = glGetAttribLocation(shader.handle, "vert_texcoords");
-
-    uniform_alphatest_enabled = glGetUniformLocation(shader.handle, "alphatest_enabled");
-    uniform_alphatest_func = glGetUniformLocation(shader.handle, "alphatest_func");
-    uniform_alphatest_ref = glGetUniformLocation(shader.handle, "alphatest_ref");
-
-    uniform_tex = glGetUniformLocation(shader.handle, "tex");
-
-    uniform_tev_combiner_buffer_color = glGetUniformLocation(shader.handle, "tev_combiner_buffer_color");
-
-    const auto tev_stages = Pica::g_state.regs.GetTevStages();
-    for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) {
-        auto& uniform_tev_cfg = uniform_tev_cfgs[tev_stage_index];
-
-        std::string tev_ref_str = "tev_cfgs[" + std::to_string(tev_stage_index) + "]";
-        uniform_tev_cfg.enabled = glGetUniformLocation(shader.handle, (tev_ref_str + ".enabled").c_str());
-        uniform_tev_cfg.color_sources = glGetUniformLocation(shader.handle, (tev_ref_str + ".color_sources").c_str());
-        uniform_tev_cfg.alpha_sources = glGetUniformLocation(shader.handle, (tev_ref_str + ".alpha_sources").c_str());
-        uniform_tev_cfg.color_modifiers = glGetUniformLocation(shader.handle, (tev_ref_str + ".color_modifiers").c_str());
-        uniform_tev_cfg.alpha_modifiers = glGetUniformLocation(shader.handle, (tev_ref_str + ".alpha_modifiers").c_str());
-        uniform_tev_cfg.color_alpha_op = glGetUniformLocation(shader.handle, (tev_ref_str + ".color_alpha_op").c_str());
-        uniform_tev_cfg.color_alpha_multiplier = glGetUniformLocation(shader.handle, (tev_ref_str + ".color_alpha_multiplier").c_str());
-        uniform_tev_cfg.const_color = glGetUniformLocation(shader.handle, (tev_ref_str + ".const_color").c_str());
-        uniform_tev_cfg.updates_combiner_buffer_color_alpha = glGetUniformLocation(shader.handle, (tev_ref_str + ".updates_combiner_buffer_color_alpha").c_str());
-    }
-
     // Create sampler objects
     for (size_t i = 0; i < texture_samplers.size(); ++i) {
         texture_samplers[i].Create();
@@ -78,29 +49,25 @@ void RasterizerOpenGL::InitObjects() {
     vertex_buffer.Create();
     vertex_array.Create();
 
-    // Update OpenGL state
     state.draw.vertex_array = vertex_array.handle;
     state.draw.vertex_buffer = vertex_buffer.handle;
-    state.draw.shader_program = shader.handle;
-
     state.Apply();
 
-    // Set the texture samplers to correspond to different texture units
-    glUniform1i(uniform_tex, 0);
-    glUniform1i(uniform_tex + 1, 1);
-    glUniform1i(uniform_tex + 2, 2);
-
     // Set vertex attributes
-    glVertexAttribPointer(attrib_position, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position));
-    glVertexAttribPointer(attrib_color, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, color));
-    glVertexAttribPointer(attrib_texcoords, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0));
-    glVertexAttribPointer(attrib_texcoords + 1, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord1));
-    glVertexAttribPointer(attrib_texcoords + 2, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord2));
-    glEnableVertexAttribArray(attrib_position);
-    glEnableVertexAttribArray(attrib_color);
-    glEnableVertexAttribArray(attrib_texcoords);
-    glEnableVertexAttribArray(attrib_texcoords + 1);
-    glEnableVertexAttribArray(attrib_texcoords + 2);
+    glVertexAttribPointer(ShaderUtil::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position));
+    glEnableVertexAttribArray(ShaderUtil::ATTRIBUTE_POSITION);
+
+    glVertexAttribPointer(ShaderUtil::ATTRIBUTE_COLOR, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, color));
+    glEnableVertexAttribArray(ShaderUtil::ATTRIBUTE_COLOR);
+
+    glVertexAttribPointer(ShaderUtil::ATTRIBUTE_TEXCOORDS, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0));
+    glVertexAttribPointer(ShaderUtil::ATTRIBUTE_TEXCOORDS + 1, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord1));
+    glVertexAttribPointer(ShaderUtil::ATTRIBUTE_TEXCOORDS + 2, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord2));
+    glEnableVertexAttribArray(ShaderUtil::ATTRIBUTE_TEXCOORDS);
+    glEnableVertexAttribArray(ShaderUtil::ATTRIBUTE_TEXCOORDS + 1);
+    glEnableVertexAttribArray(ShaderUtil::ATTRIBUTE_TEXCOORDS + 2);
+
+    RegenerateShaders();
 
     // Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation
     fb_color_texture.texture.Create();
@@ -156,55 +123,11 @@ void RasterizerOpenGL::Reset() {
     SyncBlendEnabled();
     SyncBlendFuncs();
     SyncBlendColor();
-    SyncAlphaTest();
     SyncLogicOp();
     SyncStencilTest();
     SyncDepthTest();
 
-    // TEV stage 0
-    SyncTevSources(0, regs.tev_stage0);
-    SyncTevModifiers(0, regs.tev_stage0);
-    SyncTevOps(0, regs.tev_stage0);
-    SyncTevColor(0, regs.tev_stage0);
-    SyncTevMultipliers(0, regs.tev_stage0);
-
-    // TEV stage 1
-    SyncTevSources(1, regs.tev_stage1);
-    SyncTevModifiers(1, regs.tev_stage1);
-    SyncTevOps(1, regs.tev_stage1);
-    SyncTevColor(1, regs.tev_stage1);
-    SyncTevMultipliers(1, regs.tev_stage1);
-
-    // TEV stage 2
-    SyncTevSources(2, regs.tev_stage2);
-    SyncTevModifiers(2, regs.tev_stage2);
-    SyncTevOps(2, regs.tev_stage2);
-    SyncTevColor(2, regs.tev_stage2);
-    SyncTevMultipliers(2, regs.tev_stage2);
-
-    // TEV stage 3
-    SyncTevSources(3, regs.tev_stage3);
-    SyncTevModifiers(3, regs.tev_stage3);
-    SyncTevOps(3, regs.tev_stage3);
-    SyncTevColor(3, regs.tev_stage3);
-    SyncTevMultipliers(3, regs.tev_stage3);
-
-    // TEV stage 4
-    SyncTevSources(4, regs.tev_stage4);
-    SyncTevModifiers(4, regs.tev_stage4);
-    SyncTevOps(4, regs.tev_stage4);
-    SyncTevColor(4, regs.tev_stage4);
-    SyncTevMultipliers(4, regs.tev_stage4);
-
-    // TEV stage 5
-    SyncTevSources(5, regs.tev_stage5);
-    SyncTevModifiers(5, regs.tev_stage5);
-    SyncTevOps(5, regs.tev_stage5);
-    SyncTevColor(5, regs.tev_stage5);
-    SyncTevMultipliers(5, regs.tev_stage5);
-
-    SyncCombinerColor();
-    SyncCombinerWriteFlags();
+    RegenerateShaders();
 
     res_cache.FullFlush();
 }
@@ -217,10 +140,88 @@ void RasterizerOpenGL::AddTriangle(const Pica::Shader::OutputVertex& v0,
     vertex_batch.emplace_back(v2);
 }
 
+namespace ShaderCache {
+extern std::string GenerateFragmentShader(const ShaderCacheKey& config);
+}
+
+void RasterizerOpenGL::RegenerateShaders() {
+    const auto& regs = Pica::g_state.regs;
+
+    ShaderCacheKey config;
+    config.alpha_test_func = regs.output_merger.alpha_test.enable ?
+            regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always;
+    config.tev_stages = regs.GetTevStages();
+    for (auto& tev : config.tev_stages) {
+        tev.const_r = 0;
+        tev.const_g = 0;
+        tev.const_b = 0;
+        tev.const_a = 0;
+    }
+    config.combiner_buffer_input =
+            regs.tev_combiner_buffer_input.update_mask_rgb.Value() |
+            regs.tev_combiner_buffer_input.update_mask_a.Value() << 4;
+
+    auto cached_shader = shader_cache.find(config);
+    if (cached_shader != shader_cache.end()) {
+        current_shader = &cached_shader->second;
+        state.draw.shader_program = current_shader->shader.handle;
+        state.Apply();
+    } else {
+        LOG_CRITICAL(Render_OpenGL, "Creating new shader: %08X", hash(config));
+
+        TEVShader shader;
+
+        std::string fragShader = ShaderCache::GenerateFragmentShader(config);
+        shader.shader.Create(GLShaders::g_vertex_shader_hw, fragShader.c_str());
+
+        shader.uniform_alphatest_ref = glGetUniformLocation(shader.shader.handle, "alphatest_ref");
+        shader.uniform_tex = glGetUniformLocation(shader.shader.handle, "tex");
+        shader.uniform_tev_combiner_buffer_color = glGetUniformLocation(shader.shader.handle, "tev_combiner_buffer_color");
+        shader.uniform_tev_const_colors = glGetUniformLocation(shader.shader.handle, "const_color");
+
+        current_shader = &shader_cache.emplace(config, std::move(shader)).first->second;
+
+        state.draw.shader_program = current_shader->shader.handle;
+        state.Apply();
+
+        // Set the texture samplers to correspond to different texture units
+        if (shader.uniform_tex != -1) {
+            glUniform1i(shader.uniform_tex, 0);
+            glUniform1i(shader.uniform_tex + 1, 1);
+            glUniform1i(shader.uniform_tex + 2, 2);
+        }
+    }
+
+
+    // Sync alpha reference
+    if (current_shader->uniform_alphatest_ref != -1)
+        glUniform1f(current_shader->uniform_alphatest_ref, regs.output_merger.alpha_test.ref / 255.0f);
+
+    // Sync combiner buffer color
+    if (current_shader->uniform_tev_combiner_buffer_color != -1) {
+        auto combiner_color = PicaToGL::ColorRGBA8(Pica::g_state.regs.tev_combiner_buffer_color.raw);
+        glUniform4fv(current_shader->uniform_tev_combiner_buffer_color, 1, combiner_color.data());
+    }
+
+    // Sync TEV const colors
+    if (current_shader->uniform_tev_const_colors != -1) {
+        auto& tev_stages = Pica::g_state.regs.GetTevStages();
+        for (int tev_index = 0; tev_index < tev_stages.size(); ++tev_index) {
+            auto const_color = PicaToGL::ColorRGBA8(tev_stages[tev_index].const_color);
+            glUniform4fv(current_shader->uniform_tev_const_colors + tev_index, 1, const_color.data());
+        }
+    }
+}
+
 void RasterizerOpenGL::DrawTriangles() {
     SyncFramebuffer();
     SyncDrawState();
 
+    if (state.draw.shader_dirty) {
+        RegenerateShaders();
+        state.draw.shader_dirty = false;
+    }
+
     glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(), GL_STREAM_DRAW);
     glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size());
 
@@ -272,6 +273,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
     // Alpha test
     case PICA_REG_INDEX(output_merger.alpha_test):
         SyncAlphaTest();
+        state.draw.shader_dirty = true;
         break;
 
     // Stencil test
@@ -290,117 +292,57 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
         SyncLogicOp();
         break;
 
-    // TEV stage 0
+    // TEV stages
     case PICA_REG_INDEX(tev_stage0.color_source1):
-        SyncTevSources(0, regs.tev_stage0);
-        break;
     case PICA_REG_INDEX(tev_stage0.color_modifier1):
-        SyncTevModifiers(0, regs.tev_stage0);
-        break;
     case PICA_REG_INDEX(tev_stage0.color_op):
-        SyncTevOps(0, regs.tev_stage0);
+    case PICA_REG_INDEX(tev_stage0.color_scale):
+    case PICA_REG_INDEX(tev_stage1.color_source1):
+    case PICA_REG_INDEX(tev_stage1.color_modifier1):
+    case PICA_REG_INDEX(tev_stage1.color_op):
+    case PICA_REG_INDEX(tev_stage1.color_scale):
+    case PICA_REG_INDEX(tev_stage2.color_source1):
+    case PICA_REG_INDEX(tev_stage2.color_modifier1):
+    case PICA_REG_INDEX(tev_stage2.color_op):
+    case PICA_REG_INDEX(tev_stage2.color_scale):
+    case PICA_REG_INDEX(tev_stage3.color_source1):
+    case PICA_REG_INDEX(tev_stage3.color_modifier1):
+    case PICA_REG_INDEX(tev_stage3.color_op):
+    case PICA_REG_INDEX(tev_stage3.color_scale):
+    case PICA_REG_INDEX(tev_stage4.color_source1):
+    case PICA_REG_INDEX(tev_stage4.color_modifier1):
+    case PICA_REG_INDEX(tev_stage4.color_op):
+    case PICA_REG_INDEX(tev_stage4.color_scale):
+    case PICA_REG_INDEX(tev_stage5.color_source1):
+    case PICA_REG_INDEX(tev_stage5.color_modifier1):
+    case PICA_REG_INDEX(tev_stage5.color_op):
+    case PICA_REG_INDEX(tev_stage5.color_scale):
+    case PICA_REG_INDEX(tev_combiner_buffer_input):
+        state.draw.shader_dirty = true;
         break;
     case PICA_REG_INDEX(tev_stage0.const_r):
-        SyncTevColor(0, regs.tev_stage0);
-        break;
-    case PICA_REG_INDEX(tev_stage0.color_scale):
-        SyncTevMultipliers(0, regs.tev_stage0);
-        break;
-
-    // TEV stage 1
-    case PICA_REG_INDEX(tev_stage1.color_source1):
-        SyncTevSources(1, regs.tev_stage1);
-        break;
-    case PICA_REG_INDEX(tev_stage1.color_modifier1):
-        SyncTevModifiers(1, regs.tev_stage1);
-        break;
-    case PICA_REG_INDEX(tev_stage1.color_op):
-        SyncTevOps(1, regs.tev_stage1);
+        SyncTevConstColor(0, regs.tev_stage0);
         break;
     case PICA_REG_INDEX(tev_stage1.const_r):
-        SyncTevColor(1, regs.tev_stage1);
-        break;
-    case PICA_REG_INDEX(tev_stage1.color_scale):
-        SyncTevMultipliers(1, regs.tev_stage1);
-        break;
-
-    // TEV stage 2
-    case PICA_REG_INDEX(tev_stage2.color_source1):
-        SyncTevSources(2, regs.tev_stage2);
-        break;
-    case PICA_REG_INDEX(tev_stage2.color_modifier1):
-        SyncTevModifiers(2, regs.tev_stage2);
-        break;
-    case PICA_REG_INDEX(tev_stage2.color_op):
-        SyncTevOps(2, regs.tev_stage2);
+        SyncTevConstColor(1, regs.tev_stage0);
         break;
     case PICA_REG_INDEX(tev_stage2.const_r):
-        SyncTevColor(2, regs.tev_stage2);
-        break;
-    case PICA_REG_INDEX(tev_stage2.color_scale):
-        SyncTevMultipliers(2, regs.tev_stage2);
-        break;
-
-    // TEV stage 3
-    case PICA_REG_INDEX(tev_stage3.color_source1):
-        SyncTevSources(3, regs.tev_stage3);
-        break;
-    case PICA_REG_INDEX(tev_stage3.color_modifier1):
-        SyncTevModifiers(3, regs.tev_stage3);
-        break;
-    case PICA_REG_INDEX(tev_stage3.color_op):
-        SyncTevOps(3, regs.tev_stage3);
+        SyncTevConstColor(2, regs.tev_stage0);
         break;
     case PICA_REG_INDEX(tev_stage3.const_r):
-        SyncTevColor(3, regs.tev_stage3);
-        break;
-    case PICA_REG_INDEX(tev_stage3.color_scale):
-        SyncTevMultipliers(3, regs.tev_stage3);
-        break;
-
-    // TEV stage 4
-    case PICA_REG_INDEX(tev_stage4.color_source1):
-        SyncTevSources(4, regs.tev_stage4);
-        break;
-    case PICA_REG_INDEX(tev_stage4.color_modifier1):
-        SyncTevModifiers(4, regs.tev_stage4);
-        break;
-    case PICA_REG_INDEX(tev_stage4.color_op):
-        SyncTevOps(4, regs.tev_stage4);
+        SyncTevConstColor(3, regs.tev_stage0);
         break;
     case PICA_REG_INDEX(tev_stage4.const_r):
-        SyncTevColor(4, regs.tev_stage4);
-        break;
-    case PICA_REG_INDEX(tev_stage4.color_scale):
-        SyncTevMultipliers(4, regs.tev_stage4);
-        break;
-
-    // TEV stage 5
-    case PICA_REG_INDEX(tev_stage5.color_source1):
-        SyncTevSources(5, regs.tev_stage5);
-        break;
-    case PICA_REG_INDEX(tev_stage5.color_modifier1):
-        SyncTevModifiers(5, regs.tev_stage5);
-        break;
-    case PICA_REG_INDEX(tev_stage5.color_op):
-        SyncTevOps(5, regs.tev_stage5);
+        SyncTevConstColor(4, regs.tev_stage0);
         break;
     case PICA_REG_INDEX(tev_stage5.const_r):
-        SyncTevColor(5, regs.tev_stage5);
-        break;
-    case PICA_REG_INDEX(tev_stage5.color_scale):
-        SyncTevMultipliers(5, regs.tev_stage5);
+        SyncTevConstColor(5, regs.tev_stage0);
         break;
 
     // TEV combiner buffer color
     case PICA_REG_INDEX(tev_combiner_buffer_color):
         SyncCombinerColor();
         break;
-
-    // TEV combiner buffer write flags
-    case PICA_REG_INDEX(tev_combiner_buffer_input):
-        SyncCombinerWriteFlags();
-        break;
     }
 }
 
@@ -712,9 +654,8 @@ void RasterizerOpenGL::SyncBlendColor() {
 
 void RasterizerOpenGL::SyncAlphaTest() {
     const auto& regs = Pica::g_state.regs;
-    glUniform1i(uniform_alphatest_enabled, regs.output_merger.alpha_test.enable);
-    glUniform1i(uniform_alphatest_func, (GLint)regs.output_merger.alpha_test.func.Value());
-    glUniform1f(uniform_alphatest_ref, regs.output_merger.alpha_test.ref / 255.0f);
+    if (current_shader->uniform_alphatest_ref != -1)
+        glUniform1f(current_shader->uniform_alphatest_ref, regs.output_merger.alpha_test.ref / 255.0f);
 }
 
 void RasterizerOpenGL::SyncLogicOp() {
@@ -744,55 +685,17 @@ void RasterizerOpenGL::SyncDepthTest() {
     state.depth.write_mask = regs.output_merger.depth_write_enable ? GL_TRUE : GL_FALSE;
 }
 
-void RasterizerOpenGL::SyncTevSources(unsigned stage_index, const Pica::Regs::TevStageConfig& config) {
-    GLint color_srcs[3] = { (GLint)config.color_source1.Value(),
-                            (GLint)config.color_source2.Value(),
-                            (GLint)config.color_source3.Value() };
-    GLint alpha_srcs[3] = { (GLint)config.alpha_source1.Value(),
-                            (GLint)config.alpha_source2.Value(),
-                            (GLint)config.alpha_source3.Value() };
-
-    glUniform3iv(uniform_tev_cfgs[stage_index].color_sources, 1, color_srcs);
-    glUniform3iv(uniform_tev_cfgs[stage_index].alpha_sources, 1, alpha_srcs);
-}
-
-void RasterizerOpenGL::SyncTevModifiers(unsigned stage_index, const Pica::Regs::TevStageConfig& config) {
-    GLint color_mods[3] = { (GLint)config.color_modifier1.Value(),
-                            (GLint)config.color_modifier2.Value(),
-                            (GLint)config.color_modifier3.Value() };
-    GLint alpha_mods[3] = { (GLint)config.alpha_modifier1.Value(),
-                            (GLint)config.alpha_modifier2.Value(),
-                            (GLint)config.alpha_modifier3.Value() };
-
-    glUniform3iv(uniform_tev_cfgs[stage_index].color_modifiers, 1, color_mods);
-    glUniform3iv(uniform_tev_cfgs[stage_index].alpha_modifiers, 1, alpha_mods);
-}
-
-void RasterizerOpenGL::SyncTevOps(unsigned stage_index, const Pica::Regs::TevStageConfig& config) {
-    glUniform2i(uniform_tev_cfgs[stage_index].color_alpha_op, (GLint)config.color_op.Value(), (GLint)config.alpha_op.Value());
-}
-
-void RasterizerOpenGL::SyncTevColor(unsigned stage_index, const Pica::Regs::TevStageConfig& config) {
-    auto const_color = PicaToGL::ColorRGBA8(config.const_color);
-    glUniform4fv(uniform_tev_cfgs[stage_index].const_color, 1, const_color.data());
-}
-
-void RasterizerOpenGL::SyncTevMultipliers(unsigned stage_index, const Pica::Regs::TevStageConfig& config) {
-    glUniform2i(uniform_tev_cfgs[stage_index].color_alpha_multiplier, config.GetColorMultiplier(), config.GetAlphaMultiplier());
-}
-
 void RasterizerOpenGL::SyncCombinerColor() {
-    auto combiner_color = PicaToGL::ColorRGBA8(Pica::g_state.regs.tev_combiner_buffer_color.raw);
-    glUniform4fv(uniform_tev_combiner_buffer_color, 1, combiner_color.data());
+    if (current_shader->uniform_tev_combiner_buffer_color != -1) {
+        auto combiner_color = PicaToGL::ColorRGBA8(Pica::g_state.regs.tev_combiner_buffer_color.raw);
+        glUniform4fv(current_shader->uniform_tev_combiner_buffer_color, 1, combiner_color.data());
+    }
 }
 
-void RasterizerOpenGL::SyncCombinerWriteFlags() {
-    const auto& regs = Pica::g_state.regs;
-    const auto tev_stages = regs.GetTevStages();
-    for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) {
-        glUniform2i(uniform_tev_cfgs[tev_stage_index].updates_combiner_buffer_color_alpha,
-                    regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index),
-                    regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index));
+void RasterizerOpenGL::SyncTevConstColor(int stage_index, const Pica::Regs::TevStageConfig& tev_stage) {
+    if (current_shader->uniform_tev_const_colors != -1) {
+        auto const_color = PicaToGL::ColorRGBA8(tev_stage.const_color);
+        glUniform4fv(current_shader->uniform_tev_const_colors + stage_index, 1, const_color.data());
     }
 }
 
@@ -824,12 +727,6 @@ void RasterizerOpenGL::SyncDrawState() {
         }
     }
 
-    // Skip processing TEV stages that simply pass the previous stage results through
-    const auto tev_stages = regs.GetTevStages();
-    for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) {
-        glUniform1i(uniform_tev_cfgs[tev_stage_index].enabled, !IsPassThroughTevStage(tev_stages[tev_stage_index]));
-    }
-
     state.Apply();
 }
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 1fe307846..19e8db69a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <vector>
+#include <unordered_map>
 
 #include "common/common_types.h"
 
@@ -13,6 +14,60 @@
 #include "video_core/renderer_opengl/gl_state.h"
 #include "video_core/shader/shader_interpreter.h"
 
+template <typename T>
+inline size_t hash(const T& o) {
+    return std::hash<T>()(o);
+}
+
+template <typename T>
+inline size_t combine_hash(const T& o) {
+    return hash(o);
+}
+
+template <typename T, typename... Args>
+inline size_t combine_hash(const T& o, const Args&... args) {
+    return hash(o) * 3 + combine_hash(args...);
+}
+
+struct ShaderCacheKey {
+    using Regs = Pica::Regs;
+
+    bool operator ==(const ShaderCacheKey& o) const {
+        return hash(*this) == hash(o);
+    };
+
+    Regs::CompareFunc alpha_test_func;
+    std::array<Regs::TevStageConfig, 6> tev_stages;
+    u8 combiner_buffer_input;
+
+    bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
+        return (stage_index < 4) && (combiner_buffer_input & (1 << stage_index));
+    }
+
+    bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
+        return (stage_index < 4) && ((combiner_buffer_input >> 4) & (1 << stage_index));
+    }
+};
+
+namespace std {
+
+template<> struct hash<::Pica::Regs::TevStageConfig> {
+    size_t operator()(const ::Pica::Regs::TevStageConfig& o) {
+        return ::combine_hash(
+            ::hash(o.source_raw), ::hash(o.modifier_raw),
+            ::hash(o.op_raw), ::hash(o.scale_raw));
+    }
+};
+
+template<> struct hash<::ShaderCacheKey> {
+    size_t operator()(const ::ShaderCacheKey& o) const {
+        return ::combine_hash(o.alpha_test_func, o.combiner_buffer_input,
+            o.tev_stages[0], o.tev_stages[1], o.tev_stages[2],
+            o.tev_stages[3], o.tev_stages[4], o.tev_stages[5]);
+    }
+};
+}
+
 class RasterizerOpenGL : public HWRasterizer {
 public:
 
@@ -33,6 +88,8 @@ public:
     /// Draw the current batch of triangles
     void DrawTriangles() override;
 
+    void RegenerateShaders();
+
     /// Commit the rasterizer's framebuffer contents immediately to the current 3DS memory framebuffer
     void CommitFramebuffer() override;
 
@@ -59,6 +116,22 @@ private:
         GLuint updates_combiner_buffer_color_alpha;
     };
 
+    struct TEVShader {
+        OGLShader shader;
+
+        // Hardware fragment shader
+        GLuint uniform_alphatest_ref;
+        GLuint uniform_tex;
+        GLuint uniform_tev_combiner_buffer_color;
+        GLuint uniform_tev_const_colors;
+
+        TEVShader() = default;
+        TEVShader(TEVShader&& o) : shader(std::move(o.shader)),
+            uniform_alphatest_ref(o.uniform_alphatest_ref), uniform_tex(o.uniform_tex),
+            uniform_tev_combiner_buffer_color(o.uniform_tev_combiner_buffer_color),
+            uniform_tev_const_colors(o.uniform_tev_const_colors) {}
+    };
+
     /// Structure used for storing information about color textures
     struct TextureInfo {
         OGLTexture texture;
@@ -156,27 +229,12 @@ private:
     /// Syncs the depth test states to match the PICA register
     void SyncDepthTest();
 
-    /// Syncs the specified TEV stage's color and alpha sources to match the PICA register
-    void SyncTevSources(unsigned stage_index, const Pica::Regs::TevStageConfig& config);
-
-    /// Syncs the specified TEV stage's color and alpha modifiers to match the PICA register
-    void SyncTevModifiers(unsigned stage_index, const Pica::Regs::TevStageConfig& config);
-
-    /// Syncs the specified TEV stage's color and alpha combiner operations to match the PICA register
-    void SyncTevOps(unsigned stage_index, const Pica::Regs::TevStageConfig& config);
-
-    /// Syncs the specified TEV stage's constant color to match the PICA register
-    void SyncTevColor(unsigned stage_index, const Pica::Regs::TevStageConfig& config);
-
-    /// Syncs the specified TEV stage's color and alpha multipliers to match the PICA register
-    void SyncTevMultipliers(unsigned stage_index, const Pica::Regs::TevStageConfig& config);
+    /// Syncs the TEV constant color to match the PICA register
+    void SyncTevConstColor(int tev_index, const Pica::Regs::TevStageConfig& tev_stage);
 
     /// Syncs the TEV combiner color buffer to match the PICA register
     void SyncCombinerColor();
 
-    /// Syncs the TEV combiner write flags to match the PICA register
-    void SyncCombinerWriteFlags();
-
     /// Syncs the remaining OpenGL drawing state to match the current PICA state
     void SyncDrawState();
 
@@ -213,21 +271,11 @@ private:
     std::array<SamplerInfo, 3> texture_samplers;
     TextureInfo fb_color_texture;
     DepthTextureInfo fb_depth_texture;
-    OGLShader shader;
+
+    std::unordered_map<ShaderCacheKey, TEVShader> shader_cache;
+    TEVShader* current_shader = nullptr;
+
     OGLVertexArray vertex_array;
     OGLBuffer vertex_buffer;
     OGLFramebuffer framebuffer;
-
-    // Hardware vertex shader
-    GLuint attrib_position;
-    GLuint attrib_color;
-    GLuint attrib_texcoords;
-
-    // Hardware fragment shader
-    GLuint uniform_alphatest_enabled;
-    GLuint uniform_alphatest_func;
-    GLuint uniform_alphatest_ref;
-    GLuint uniform_tex;
-    GLuint uniform_tev_combiner_buffer_color;
-    TEVConfigUniforms uniform_tev_cfgs[6];
 };
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index 4cf246c06..ee32f6a31 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -2,6 +2,13 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+
+#include "gl_shader_util.h"
+#include "gl_rasterizer.h"
+#include "common/logging/log.h"
+
+#include "video_core/pica.h"
+
 #include <algorithm>
 #include <vector>
 
@@ -65,6 +72,13 @@ GLuint LoadShaders(const char* vertex_shader, const char* fragment_shader) {
     GLuint program_id = glCreateProgram();
     glAttachShader(program_id, vertex_shader_id);
     glAttachShader(program_id, fragment_shader_id);
+
+    glBindAttribLocation(program_id, Attributes::ATTRIBUTE_POSITION, "vert_position");
+    glBindAttribLocation(program_id, Attributes::ATTRIBUTE_COLOR, "vert_color");
+    glBindAttribLocation(program_id, Attributes::ATTRIBUTE_TEXCOORDS + 0, "vert_texcoords0");
+    glBindAttribLocation(program_id, Attributes::ATTRIBUTE_TEXCOORDS + 1, "vert_texcoords1");
+    glBindAttribLocation(program_id, Attributes::ATTRIBUTE_TEXCOORDS + 2, "vert_texcoords2");
+
     glLinkProgram(program_id);
 
     // Check the program
@@ -88,3 +102,338 @@ GLuint LoadShaders(const char* vertex_shader, const char* fragment_shader) {
 }
 
 }
+
+namespace ShaderCache
+{
+
+static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) {
+    return (stage.color_op == Pica::Regs::TevStageConfig::Operation::Replace &&
+            stage.alpha_op == Pica::Regs::TevStageConfig::Operation::Replace &&
+            stage.color_source1 == Pica::Regs::TevStageConfig::Source::Previous &&
+            stage.alpha_source1 == Pica::Regs::TevStageConfig::Source::Previous &&
+            stage.color_modifier1 == Pica::Regs::TevStageConfig::ColorModifier::SourceColor &&
+            stage.alpha_modifier1 == Pica::Regs::TevStageConfig::AlphaModifier::SourceAlpha &&
+            stage.GetColorMultiplier() == 1 &&
+            stage.GetAlphaMultiplier() == 1);
+}
+
+void AppendSource(std::string& shader, Pica::Regs::TevStageConfig::Source source, const std::string& index_name) {
+    using Source = Pica::Regs::TevStageConfig::Source;
+    switch (source) {
+    case Source::PrimaryColor:
+        shader += "o[2]";
+        break;
+    case Source::PrimaryFragmentColor:
+        // HACK: Until we implement fragment lighting, use primary_color
+        shader += "o[2]";
+        break;
+    case Source::SecondaryFragmentColor:
+        // HACK: Until we implement fragment lighting, use zero
+        shader += "vec4(0.0, 0.0, 0.0, 0.0)";
+        break;
+    case Source::Texture0:
+        shader += "texture(tex[0], o[3].xy)";
+        break;
+    case Source::Texture1:
+        shader += "texture(tex[1], o[3].zw)";
+        break;
+    case Source::Texture2: // TODO: Unverified
+        shader += "texture(tex[2], o[5].zw)";
+        break;
+    case Source::PreviousBuffer:
+        shader += "g_combiner_buffer";
+        break;
+    case Source::Constant:
+        shader += "const_color[" + index_name + "]";
+        break;
+    case Source::Previous:
+        shader += "g_last_tex_env_out";
+        break;
+    default:
+        shader += "vec4(0.0)";
+        LOG_CRITICAL(Render_OpenGL, "Unknown source op %u", source);
+        break;
+    }
+}
+
+void AppendColorModifier(std::string& shader, Pica::Regs::TevStageConfig::ColorModifier modifier, Pica::Regs::TevStageConfig::Source source, const std::string& index_name) {
+    using ColorModifier = Pica::Regs::TevStageConfig::ColorModifier;
+    switch (modifier) {
+        case ColorModifier::SourceColor:
+            AppendSource(shader, source, index_name);
+            shader += ".rgb";
+            break;
+        case ColorModifier::OneMinusSourceColor:
+            shader += "vec3(1.0) - ";
+            AppendSource(shader, source, index_name);
+            shader += ".rgb";
+            break;
+        case ColorModifier::SourceAlpha:
+            AppendSource(shader, source, index_name);
+            shader += ".aaa";
+            break;
+        case ColorModifier::OneMinusSourceAlpha:
+            shader += "vec3(1.0) - ";
+            AppendSource(shader, source, index_name);
+            shader += ".aaa";
+            break;
+        case ColorModifier::SourceRed:
+            AppendSource(shader, source, index_name);
+            shader += ".rrr";
+            break;
+        case ColorModifier::OneMinusSourceRed:
+            shader += "vec3(1.0) - ";
+            AppendSource(shader, source, index_name);
+            shader += ".rrr";
+            break;
+        case ColorModifier::SourceGreen:
+            AppendSource(shader, source, index_name);
+            shader += ".ggg";
+            break;
+        case ColorModifier::OneMinusSourceGreen:
+            shader += "vec3(1.0) - ";
+            AppendSource(shader, source, index_name);
+            shader += ".ggg";
+            break;
+        case ColorModifier::SourceBlue:
+            AppendSource(shader, source, index_name);
+            shader += ".bbb";
+            break;
+        case ColorModifier::OneMinusSourceBlue:
+            shader += "vec3(1.0) - ";
+            AppendSource(shader, source, index_name);
+            shader += ".bbb";
+            break;
+        default:
+            shader += "vec3(0.0)";
+            LOG_CRITICAL(Render_OpenGL, "Unknown color modifier op %u", modifier);
+            break;
+    }
+}
+
+void AppendAlphaModifier(std::string& shader, Pica::Regs::TevStageConfig::AlphaModifier modifier, Pica::Regs::TevStageConfig::Source source, const std::string& index_name) {
+    using AlphaModifier = Pica::Regs::TevStageConfig::AlphaModifier;
+    switch (modifier) {
+        case AlphaModifier::SourceAlpha:
+            AppendSource(shader, source, index_name);
+            shader += ".a";
+            break;
+        case AlphaModifier::OneMinusSourceAlpha:
+            shader += "1.0 - ";
+            AppendSource(shader, source, index_name);
+            shader += ".a";
+            break;
+        case AlphaModifier::SourceRed:
+            AppendSource(shader, source, index_name);
+            shader += ".r";
+            break;
+        case AlphaModifier::OneMinusSourceRed:
+            shader += "1.0 - ";
+            AppendSource(shader, source, index_name);
+            shader += ".r";
+            break;
+        case AlphaModifier::SourceGreen:
+            AppendSource(shader, source, index_name);
+            shader += ".g";
+            break;
+        case AlphaModifier::OneMinusSourceGreen:
+            shader += "1.0 - ";
+            AppendSource(shader, source, index_name);
+            shader += ".g";
+            break;
+        case AlphaModifier::SourceBlue:
+            AppendSource(shader, source, index_name);
+            shader += ".b";
+            break;
+        case AlphaModifier::OneMinusSourceBlue:
+            shader += "1.0 - ";
+            AppendSource(shader, source, index_name);
+            shader += ".b";
+            break;
+        default:
+            shader += "vec3(0.0)";
+            LOG_CRITICAL(Render_OpenGL, "Unknown alpha modifier op %u", modifier);
+            break;
+    }
+}
+
+void AppendColorCombiner(std::string& shader, Pica::Regs::TevStageConfig::Operation operation, const std::string& variable_name) {
+    using Operation = Pica::Regs::TevStageConfig::Operation;
+
+    switch (operation) {
+        case Operation::Replace:
+            shader += variable_name + "[0]";
+            break;
+        case Operation::Modulate:
+            shader += variable_name + "[0] * " + variable_name + "[1]";
+            break;
+        case Operation::Add:
+            shader += "min(" + variable_name + "[0] + " + variable_name + "[1], 1.0)";
+            break;
+        case Operation::AddSigned:
+            shader += "clamp(" + variable_name + "[0] + " + variable_name + "[1] - vec3(0.5), 0.0, 1.0)";
+            break;
+        case Operation::Lerp:
+            shader += variable_name + "[0] * " + variable_name + "[2] + " + variable_name + "[1] * (vec3(1.0) - " + variable_name + "[2])";
+            break;
+        case Operation::Subtract:
+            shader += "max(" + variable_name + "[0] - " + variable_name + "[1], 0.0)";
+            break;
+        case Operation::MultiplyThenAdd:
+            shader += "min(" + variable_name + "[0] * " + variable_name + "[1] + " + variable_name + "[2], 1.0)";
+            break;
+        case Operation::AddThenMultiply:
+            shader += "min(" + variable_name + "[0] + " + variable_name + "[1], 1.0) * " + variable_name + "[2]";
+            break;
+        default:
+            shader += "0.0";
+            LOG_CRITICAL(Render_OpenGL, "Unknown color comb op %u", operation);
+            break;
+    }
+}
+
+void AppendAlphaCombiner(std::string& shader, Pica::Regs::TevStageConfig::Operation operation, const std::string& variable_name) {
+    using Operation = Pica::Regs::TevStageConfig::Operation;
+    switch (operation) {
+        case Operation::Replace:
+            shader += variable_name + "[0]";
+            break;
+        case Operation::Modulate:
+            shader += variable_name + "[0] * " + variable_name + "[1]";
+            break;
+        case Operation::Add:
+            shader += "min(" + variable_name + "[0] + " + variable_name + "[1], 1.0)";
+            break;
+        case Operation::AddSigned:
+            shader += "clamp(" + variable_name + "[0] + " + variable_name + "[1] - 0.5, 0.0, 1.0)";
+            break;
+        case Operation::Lerp:
+            shader += variable_name + "[0] * " + variable_name + "[2] + " + variable_name + "[1] * (1.0 - " + variable_name + "[2])";
+            break;
+        case Operation::Subtract:
+            shader += "max(" + variable_name + "[0] - " + variable_name + "[1], 0.0)";
+            break;
+        case Operation::MultiplyThenAdd:
+            shader += "min(" + variable_name + "[0] * " + variable_name + "[1] + " + variable_name + "[2], 1.0)";
+            break;
+        case Operation::AddThenMultiply:
+            shader += "min(" + variable_name + "[0] + " + variable_name + "[1], 1.0) * " + variable_name + "[2]";
+            break;
+        default:
+            shader += "0.0";
+            LOG_CRITICAL(Render_OpenGL, "Unknown alpha combiner op %u", operation);
+            break;
+    }
+}
+
+void AppendAlphaTestCondition(std::string& shader, Pica::Regs::CompareFunc func) {
+    using CompareFunc = Pica::Regs::CompareFunc;
+    switch (func) {
+        case CompareFunc::Never:
+            shader += "true";
+            break;
+        case CompareFunc::Always:
+            shader += "false";
+            break;
+        case CompareFunc::Equal:
+            shader += "g_last_tex_env_out.a != alphatest_ref";
+            break;
+        case CompareFunc::NotEqual:
+            shader += "g_last_tex_env_out.a == alphatest_ref";
+            break;
+        case CompareFunc::LessThan:
+            shader += "g_last_tex_env_out.a >= alphatest_ref";
+            break;
+        case CompareFunc::LessThanOrEqual:
+            shader += "g_last_tex_env_out.a > alphatest_ref";
+            break;
+        case CompareFunc::GreaterThan:
+            shader += "g_last_tex_env_out.a <= alphatest_ref";
+            break;
+        case CompareFunc::GreaterThanOrEqual:
+            shader += "g_last_tex_env_out.a < alphatest_ref";
+            break;
+        default:
+            shader += "false";
+            LOG_CRITICAL(Render_OpenGL, "Unknown alpha test condition %u", func);
+            break;
+    }
+}
+
+std::string GenerateFragmentShader(const ShaderCacheKey& config) {
+    std::string shader = R"(
+#version 150 core
+
+#define NUM_VTX_ATTR 7
+#define NUM_TEV_STAGES 6
+
+in vec4 o[NUM_VTX_ATTR];
+out vec4 color;
+
+uniform float alphatest_ref;
+uniform vec4 const_color[NUM_TEV_STAGES];
+uniform sampler2D tex[3];
+
+uniform vec4 tev_combiner_buffer_color;
+
+void main(void) {
+    vec4 g_combiner_buffer = tev_combiner_buffer_color;
+    vec4 g_last_tex_env_out = vec4(0.0, 0.0, 0.0, 0.0);
+)";
+
+    // Do not do any sort of processing if it's obvious we're not going to pass the alpha test
+    if (config.alpha_test_func == Pica::Regs::CompareFunc::Never) {
+        shader += "discard;";
+        return shader;
+    }
+
+    auto& tev_stages = config.tev_stages;
+    for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) {
+        auto& tev_stage = tev_stages[tev_stage_index];
+        if (!IsPassThroughTevStage(tev_stage)) {
+            std::string index_name = std::to_string(tev_stage_index);
+
+            shader += "vec3 color_results_" + index_name + "[3] = vec3[3](";
+            AppendColorModifier(shader, tev_stage.color_modifier1, tev_stage.color_source1, index_name);
+            shader += ", ";
+            AppendColorModifier(shader, tev_stage.color_modifier2, tev_stage.color_source2, index_name);
+            shader += ", ";
+            AppendColorModifier(shader, tev_stage.color_modifier3, tev_stage.color_source3, index_name);
+            shader += ");\n";
+
+            shader += "vec3 color_output_" + index_name + " = ";
+            AppendColorCombiner(shader, tev_stage.color_op, "color_results_" + index_name);
+            shader += ";\n";
+
+            shader += "float alpha_results_" + index_name + "[3] = float[3](";
+            AppendAlphaModifier(shader, tev_stage.alpha_modifier1, tev_stage.alpha_source1, index_name);
+            shader += ", ";
+            AppendAlphaModifier(shader, tev_stage.alpha_modifier2, tev_stage.alpha_source2, index_name);
+            shader += ", ";
+            AppendAlphaModifier(shader, tev_stage.alpha_modifier3, tev_stage.alpha_source3, index_name);
+            shader += ");\n";
+
+            shader += "float alpha_output_" + index_name + " = ";
+            AppendAlphaCombiner(shader, tev_stage.alpha_op, "alpha_results_" + index_name);
+            shader += ";\n";
+
+            shader += "g_last_tex_env_out = vec4(min(color_output_" + index_name + " * " + std::to_string(tev_stage.GetColorMultiplier()) + ".0, 1.0), min(alpha_output_" + index_name + " * " + std::to_string(tev_stage.GetAlphaMultiplier()) + ".0, 1.0));\n";
+        }
+
+        if (config.TevStageUpdatesCombinerBufferColor(tev_stage_index))
+            shader += "g_combiner_buffer.rgb = g_last_tex_env_out.rgb;\n";
+
+        if (config.TevStageUpdatesCombinerBufferAlpha(tev_stage_index))
+            shader += "g_combiner_buffer.a = g_last_tex_env_out.a;\n";
+    }
+
+    if (config.alpha_test_func != Pica::Regs::CompareFunc::Always) {
+        shader += "if (";
+        AppendAlphaTestCondition(shader, config.alpha_test_func);
+        shader += ") {\n discard;\n }\n";
+    }
+
+    shader += "color = g_last_tex_env_out;\n}";
+    return shader;
+}
+}
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index c9d7cc380..ca62c83ba 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -8,6 +8,12 @@
 
 namespace ShaderUtil {
 
+enum Attributes {
+    ATTRIBUTE_POSITION  = 0,
+    ATTRIBUTE_COLOR     = 1,
+    ATTRIBUTE_TEXCOORDS = 2,
+};
+
 GLuint LoadShaders(const char* vertex_file_path, const char* fragment_file_path);
 
 }
diff --git a/src/video_core/renderer_opengl/gl_shaders.h b/src/video_core/renderer_opengl/gl_shaders.h
index a8cb2f595..2ba2c6b0f 100644
--- a/src/video_core/renderer_opengl/gl_shaders.h
+++ b/src/video_core/renderer_opengl/gl_shaders.h
@@ -49,14 +49,16 @@ const char g_vertex_shader_hw[] = R"(
 
 in vec4 vert_position;
 in vec4 vert_color;
-in vec2 vert_texcoords[3];
+in vec2 vert_texcoords0;
+in vec2 vert_texcoords1;
+in vec2 vert_texcoords2;
 
 out vec4 o[NUM_VTX_ATTR];
 
 void main() {
     o[2] = vert_color;
-    o[3] = vec4(vert_texcoords[0].xy, vert_texcoords[1].xy);
-    o[5] = vec4(0.0, 0.0, vert_texcoords[2].xy);
+    o[3] = vec4(vert_texcoords0.xy, vert_texcoords1.xy);
+    o[5] = vec4(0.0, 0.0, vert_texcoords2.xy);
 
     gl_Position = vec4(vert_position.x, -vert_position.y, -vert_position.z, vert_position.w);
 }
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 6ecbedbb4..668b04259 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -65,6 +65,7 @@ public:
         GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING
         GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING
         GLuint shader_program; // GL_CURRENT_PROGRAM
+        bool shader_dirty;
     } draw;
 
     OpenGLState();