diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 1318a332e..1c613df8e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -37,7 +37,8 @@ MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE), uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE), - index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE) { + index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE), + texture_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE) { allow_shadow = GLAD_GL_ARB_shader_image_load_store && GLAD_GL_ARB_shader_image_size && GLAD_GL_ARB_framebuffer_no_attachments; @@ -66,7 +67,8 @@ RasterizerOpenGL::RasterizerOpenGL() uniform_block_data.dirty = true; - uniform_block_data.lut_dirty.fill(true); + uniform_block_data.lighting_lut_dirty.fill(true); + uniform_block_data.lighting_lut_dirty_any = true; uniform_block_data.fog_lut_dirty = true; @@ -122,77 +124,16 @@ RasterizerOpenGL::RasterizerOpenGL() // Create render framebuffer framebuffer.Create(); - // Allocate and bind lighting lut textures - lighting_lut.Create(); - state.lighting_lut.texture_buffer = lighting_lut.handle; + // Allocate and bind texture buffer lut textures + texture_buffer_lut_rg.Create(); + texture_buffer_lut_rgba.Create(); + state.texture_buffer_lut_rg.texture_buffer = texture_buffer_lut_rg.handle; + state.texture_buffer_lut_rgba.texture_buffer = texture_buffer_lut_rgba.handle; state.Apply(); - lighting_lut_buffer.Create(); - glBindBuffer(GL_TEXTURE_BUFFER, lighting_lut_buffer.handle); - glBufferData(GL_TEXTURE_BUFFER, - sizeof(GLfloat) * 2 * 256 * Pica::LightingRegs::NumLightingSampler, nullptr, - GL_DYNAMIC_DRAW); - glActiveTexture(TextureUnits::LightingLUT.Enum()); - glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, lighting_lut_buffer.handle); - - // Setup the LUT for the fog - fog_lut.Create(); - state.fog_lut.texture_buffer = fog_lut.handle; - state.Apply(); - fog_lut_buffer.Create(); - glBindBuffer(GL_TEXTURE_BUFFER, fog_lut_buffer.handle); - glBufferData(GL_TEXTURE_BUFFER, sizeof(GLfloat) * 2 * 128, nullptr, GL_DYNAMIC_DRAW); - glActiveTexture(TextureUnits::FogLUT.Enum()); - glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, fog_lut_buffer.handle); - - // Setup the noise LUT for proctex - proctex_noise_lut.Create(); - state.proctex_noise_lut.texture_buffer = proctex_noise_lut.handle; - state.Apply(); - proctex_noise_lut_buffer.Create(); - glBindBuffer(GL_TEXTURE_BUFFER, proctex_noise_lut_buffer.handle); - glBufferData(GL_TEXTURE_BUFFER, sizeof(GLfloat) * 2 * 128, nullptr, GL_DYNAMIC_DRAW); - glActiveTexture(TextureUnits::ProcTexNoiseLUT.Enum()); - glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, proctex_noise_lut_buffer.handle); - - // Setup the color map for proctex - proctex_color_map.Create(); - state.proctex_color_map.texture_buffer = proctex_color_map.handle; - state.Apply(); - proctex_color_map_buffer.Create(); - glBindBuffer(GL_TEXTURE_BUFFER, proctex_color_map_buffer.handle); - glBufferData(GL_TEXTURE_BUFFER, sizeof(GLfloat) * 2 * 128, nullptr, GL_DYNAMIC_DRAW); - glActiveTexture(TextureUnits::ProcTexColorMap.Enum()); - glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, proctex_color_map_buffer.handle); - - // Setup the alpha map for proctex - proctex_alpha_map.Create(); - state.proctex_alpha_map.texture_buffer = proctex_alpha_map.handle; - state.Apply(); - proctex_alpha_map_buffer.Create(); - glBindBuffer(GL_TEXTURE_BUFFER, proctex_alpha_map_buffer.handle); - glBufferData(GL_TEXTURE_BUFFER, sizeof(GLfloat) * 2 * 128, nullptr, GL_DYNAMIC_DRAW); - glActiveTexture(TextureUnits::ProcTexAlphaMap.Enum()); - glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, proctex_alpha_map_buffer.handle); - - // Setup the LUT for proctex - proctex_lut.Create(); - state.proctex_lut.texture_buffer = proctex_lut.handle; - state.Apply(); - proctex_lut_buffer.Create(); - glBindBuffer(GL_TEXTURE_BUFFER, proctex_lut_buffer.handle); - glBufferData(GL_TEXTURE_BUFFER, sizeof(GLfloat) * 4 * 256, nullptr, GL_DYNAMIC_DRAW); - glActiveTexture(TextureUnits::ProcTexLUT.Enum()); - glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, proctex_lut_buffer.handle); - - // Setup the difference LUT for proctex - proctex_diff_lut.Create(); - state.proctex_diff_lut.texture_buffer = proctex_diff_lut.handle; - state.Apply(); - proctex_diff_lut_buffer.Create(); - glBindBuffer(GL_TEXTURE_BUFFER, proctex_diff_lut_buffer.handle); - glBufferData(GL_TEXTURE_BUFFER, sizeof(GLfloat) * 4 * 256, nullptr, GL_DYNAMIC_DRAW); - glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum()); - glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, proctex_diff_lut_buffer.handle); + glActiveTexture(TextureUnits::TextureBufferLUT_RG.Enum()); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_buffer.GetHandle()); + glActiveTexture(TextureUnits::TextureBufferLUT_RGBA.Enum()); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, texture_buffer.GetHandle()); // Bind index buffer for hardware shader path state.draw.vertex_array = hw_vao.handle; @@ -803,49 +744,8 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { shader_dirty = false; } - // Sync the lighting luts - for (unsigned index = 0; index < uniform_block_data.lut_dirty.size(); index++) { - if (uniform_block_data.lut_dirty[index]) { - SyncLightingLUT(index); - uniform_block_data.lut_dirty[index] = false; - } - } - - // Sync the fog lut - if (uniform_block_data.fog_lut_dirty) { - SyncFogLUT(); - uniform_block_data.fog_lut_dirty = false; - } - - // Sync the proctex noise lut - if (uniform_block_data.proctex_noise_lut_dirty) { - SyncProcTexNoiseLUT(); - uniform_block_data.proctex_noise_lut_dirty = false; - } - - // Sync the proctex color map - if (uniform_block_data.proctex_color_map_dirty) { - SyncProcTexColorMap(); - uniform_block_data.proctex_color_map_dirty = false; - } - - // Sync the proctex alpha map - if (uniform_block_data.proctex_alpha_map_dirty) { - SyncProcTexAlphaMap(); - uniform_block_data.proctex_alpha_map_dirty = false; - } - - // Sync the proctex lut - if (uniform_block_data.proctex_lut_dirty) { - SyncProcTexLUT(); - uniform_block_data.proctex_lut_dirty = false; - } - - // Sync the proctex difference lut - if (uniform_block_data.proctex_diff_lut_dirty) { - SyncProcTexDiffLUT(); - uniform_block_data.proctex_diff_lut_dirty = false; - } + // Sync the LUTs within the texture buffer + SyncAndUploadLUTs(); // Sync the uniform data const bool use_gs = regs.pipeline.use_gs == Pica::PipelineRegs::UseGS::Yes; @@ -1408,7 +1308,8 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce): case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): { auto& lut_config = regs.lighting.lut_config; - uniform_block_data.lut_dirty[lut_config.type] = true; + uniform_block_data.lighting_lut_dirty[lut_config.type] = true; + uniform_block_data.lighting_lut_dirty_any = true; break; } } @@ -1763,21 +1664,6 @@ void RasterizerOpenGL::SyncFogColor() { uniform_block_data.dirty = true; } -void RasterizerOpenGL::SyncFogLUT() { - std::array new_data; - - std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), - [](const auto& entry) { - return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; - }); - - if (new_data != fog_lut_data) { - fog_lut_data = new_data; - glBindBuffer(GL_TEXTURE_BUFFER, fog_lut_buffer.handle); - glBufferSubData(GL_TEXTURE_BUFFER, 0, new_data.size() * sizeof(GLvec2), new_data.data()); - } -} - void RasterizerOpenGL::SyncProcTexNoise() { const auto& regs = Pica::g_state.regs.texturing; uniform_block_data.data.proctex_noise_f = { @@ -1796,70 +1682,6 @@ void RasterizerOpenGL::SyncProcTexNoise() { uniform_block_data.dirty = true; } -// helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap -static void SyncProcTexValueLUT(const std::array& lut, - std::array& lut_data, GLuint buffer) { - std::array new_data; - std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) { - return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; - }); - - if (new_data != lut_data) { - lut_data = new_data; - glBindBuffer(GL_TEXTURE_BUFFER, buffer); - glBufferSubData(GL_TEXTURE_BUFFER, 0, new_data.size() * sizeof(GLvec2), new_data.data()); - } -} - -void RasterizerOpenGL::SyncProcTexNoiseLUT() { - SyncProcTexValueLUT(Pica::g_state.proctex.noise_table, proctex_noise_lut_data, - proctex_noise_lut_buffer.handle); -} - -void RasterizerOpenGL::SyncProcTexColorMap() { - SyncProcTexValueLUT(Pica::g_state.proctex.color_map_table, proctex_color_map_data, - proctex_color_map_buffer.handle); -} - -void RasterizerOpenGL::SyncProcTexAlphaMap() { - SyncProcTexValueLUT(Pica::g_state.proctex.alpha_map_table, proctex_alpha_map_data, - proctex_alpha_map_buffer.handle); -} - -void RasterizerOpenGL::SyncProcTexLUT() { - std::array new_data; - - std::transform(Pica::g_state.proctex.color_table.begin(), - Pica::g_state.proctex.color_table.end(), new_data.begin(), - [](const auto& entry) { - auto rgba = entry.ToVector() / 255.0f; - return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; - }); - - if (new_data != proctex_lut_data) { - proctex_lut_data = new_data; - glBindBuffer(GL_TEXTURE_BUFFER, proctex_lut_buffer.handle); - glBufferSubData(GL_TEXTURE_BUFFER, 0, new_data.size() * sizeof(GLvec4), new_data.data()); - } -} - -void RasterizerOpenGL::SyncProcTexDiffLUT() { - std::array new_data; - - std::transform(Pica::g_state.proctex.color_diff_table.begin(), - Pica::g_state.proctex.color_diff_table.end(), new_data.begin(), - [](const auto& entry) { - auto rgba = entry.ToVector() / 255.0f; - return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; - }); - - if (new_data != proctex_diff_lut_data) { - proctex_diff_lut_data = new_data; - glBindBuffer(GL_TEXTURE_BUFFER, proctex_diff_lut_buffer.handle); - glBufferSubData(GL_TEXTURE_BUFFER, 0, new_data.size() * sizeof(GLvec4), new_data.data()); - } -} - void RasterizerOpenGL::SyncAlphaTest() { const auto& regs = Pica::g_state.regs; if (regs.framebuffer.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) { @@ -1957,21 +1779,6 @@ void RasterizerOpenGL::SyncGlobalAmbient() { } } -void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) { - std::array new_data; - const auto& source_lut = Pica::g_state.lighting.luts[lut_index]; - std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), [](const auto& entry) { - return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; - }); - - if (new_data != lighting_lut_data[lut_index]) { - lighting_lut_data[lut_index] = new_data; - glBindBuffer(GL_TEXTURE_BUFFER, lighting_lut_buffer.handle); - glBufferSubData(GL_TEXTURE_BUFFER, lut_index * new_data.size() * sizeof(GLvec2), - new_data.size() * sizeof(GLvec2), new_data.data()); - } -} - void RasterizerOpenGL::SyncLightSpecular0(int light_index) { auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_0); if (color != uniform_block_data.data.light_src[light_index].specular_0) { @@ -2062,6 +1869,158 @@ void RasterizerOpenGL::SyncShadowBias() { } } +void RasterizerOpenGL::SyncAndUploadLUTs() { + constexpr size_t max_size = sizeof(GLvec2) * 256 * Pica::LightingRegs::NumLightingSampler + + sizeof(GLvec2) * 128 + // fog + sizeof(GLvec2) * 128 * 3 + // proctex: noise + color + alpha + sizeof(GLvec4) * 256 + // proctex + sizeof(GLvec4) * 256; // proctex diff + + if (!uniform_block_data.lighting_lut_dirty_any && !uniform_block_data.fog_lut_dirty && + !uniform_block_data.proctex_noise_lut_dirty && + !uniform_block_data.proctex_color_map_dirty && + !uniform_block_data.proctex_alpha_map_dirty && !uniform_block_data.proctex_lut_dirty && + !uniform_block_data.proctex_diff_lut_dirty) { + return; + } + + u8* buffer; + GLintptr offset; + bool invalidate; + size_t bytes_used = 0; + glBindBuffer(GL_TEXTURE_BUFFER, texture_buffer.GetHandle()); + std::tie(buffer, offset, invalidate) = texture_buffer.Map(max_size, sizeof(GLvec4)); + + // Sync the lighting luts + if (uniform_block_data.lighting_lut_dirty_any || invalidate) { + for (unsigned index = 0; index < uniform_block_data.lighting_lut_dirty.size(); index++) { + if (uniform_block_data.lighting_lut_dirty[index] || invalidate) { + std::array new_data; + const auto& source_lut = Pica::g_state.lighting.luts[index]; + std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), + [](const auto& entry) { + return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; + }); + + if (new_data != lighting_lut_data[index] || invalidate) { + lighting_lut_data[index] = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), + new_data.size() * sizeof(GLvec2)); + uniform_block_data.data.lighting_lut_offset[index / 4][index % 4] = + (offset + bytes_used) / sizeof(GLvec2); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(GLvec2); + } + uniform_block_data.lighting_lut_dirty[index] = false; + } + } + } + uniform_block_data.lighting_lut_dirty_any = false; + + // Sync the fog lut + if (uniform_block_data.fog_lut_dirty || invalidate) { + std::array new_data; + + std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), + [](const auto& entry) { + return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; + }); + + if (new_data != fog_lut_data || invalidate) { + fog_lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(GLvec2)); + uniform_block_data.data.fog_lut_offset = (offset + bytes_used) / sizeof(GLvec2); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(GLvec2); + } + uniform_block_data.fog_lut_dirty = false; + } + + // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap + auto SyncProcTexValueLUT = [this, buffer, offset, invalidate, &bytes_used]( + const std::array& lut, + std::array& lut_data, GLint& lut_offset) { + std::array new_data; + std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) { + return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; + }); + + if (new_data != lut_data || invalidate) { + lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(GLvec2)); + lut_offset = (offset + bytes_used) / sizeof(GLvec2); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(GLvec2); + } + }; + + // Sync the proctex noise lut + if (uniform_block_data.proctex_noise_lut_dirty || invalidate) { + SyncProcTexValueLUT(Pica::g_state.proctex.noise_table, proctex_noise_lut_data, + uniform_block_data.data.proctex_noise_lut_offset); + uniform_block_data.proctex_noise_lut_dirty = false; + } + + // Sync the proctex color map + if (uniform_block_data.proctex_color_map_dirty || invalidate) { + SyncProcTexValueLUT(Pica::g_state.proctex.color_map_table, proctex_color_map_data, + uniform_block_data.data.proctex_color_map_offset); + uniform_block_data.proctex_color_map_dirty = false; + } + + // Sync the proctex alpha map + if (uniform_block_data.proctex_alpha_map_dirty || invalidate) { + SyncProcTexValueLUT(Pica::g_state.proctex.alpha_map_table, proctex_alpha_map_data, + uniform_block_data.data.proctex_alpha_map_offset); + uniform_block_data.proctex_alpha_map_dirty = false; + } + + // Sync the proctex lut + if (uniform_block_data.proctex_lut_dirty || invalidate) { + std::array new_data; + + std::transform(Pica::g_state.proctex.color_table.begin(), + Pica::g_state.proctex.color_table.end(), new_data.begin(), + [](const auto& entry) { + auto rgba = entry.ToVector() / 255.0f; + return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; + }); + + if (new_data != proctex_lut_data || invalidate) { + proctex_lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(GLvec4)); + uniform_block_data.data.proctex_lut_offset = (offset + bytes_used) / sizeof(GLvec4); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(GLvec4); + } + uniform_block_data.proctex_lut_dirty = false; + } + + // Sync the proctex difference lut + if (uniform_block_data.proctex_diff_lut_dirty || invalidate) { + std::array new_data; + + std::transform(Pica::g_state.proctex.color_diff_table.begin(), + Pica::g_state.proctex.color_diff_table.end(), new_data.begin(), + [](const auto& entry) { + auto rgba = entry.ToVector() / 255.0f; + return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; + }); + + if (new_data != proctex_diff_lut_data || invalidate) { + proctex_diff_lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(GLvec4)); + uniform_block_data.data.proctex_diff_lut_offset = + (offset + bytes_used) / sizeof(GLvec4); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(GLvec4); + } + uniform_block_data.proctex_diff_lut_dirty = false; + } + + texture_buffer.Unmap(bytes_used); +} + void RasterizerOpenGL::UploadUniforms(bool accelerate_draw, bool use_gs) { // glBindBufferRange below also changes the generic buffer binding point, so we sync the state // first diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 34058796b..2753ddb79 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -148,18 +148,10 @@ private: /// Syncs the fog states to match the PICA register void SyncFogColor(); - void SyncFogLUT(); /// Sync the procedural texture noise configuration to match the PICA register void SyncProcTexNoise(); - /// Sync the procedural texture lookup tables - void SyncProcTexNoiseLUT(); - void SyncProcTexColorMap(); - void SyncProcTexAlphaMap(); - void SyncProcTexLUT(); - void SyncProcTexDiffLUT(); - /// Syncs the alpha test states to match the PICA register void SyncAlphaTest(); @@ -190,9 +182,6 @@ private: /// Syncs the lighting global ambient color to match the PICA register void SyncGlobalAmbient(); - /// Syncs the lighting lookup tables - void SyncLightingLUT(unsigned index); - /// Syncs the specified light's specular 0 color to match the PICA register void SyncLightSpecular0(int light_index); @@ -220,6 +209,9 @@ private: /// Syncs the shadow rendering bias to match the PICA register void SyncShadowBias(); + /// Syncs and uploads the lighting, fog and proctex LUTs + void SyncAndUploadLUTs(); + /// Upload the uniform blocks to the uniform buffer object void UploadUniforms(bool accelerate_draw, bool use_gs); @@ -258,7 +250,8 @@ private: struct { UniformData data; - std::array lut_dirty; + std::array lighting_lut_dirty; + bool lighting_lut_dirty_any; bool fog_lut_dirty; bool proctex_noise_lut_dirty; bool proctex_color_map_dirty; @@ -274,6 +267,7 @@ private: static constexpr size_t VERTEX_BUFFER_SIZE = 32 * 1024 * 1024; static constexpr size_t INDEX_BUFFER_SIZE = 1 * 1024 * 1024; static constexpr size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; + static constexpr size_t TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024; OGLVertexArray sw_vao; // VAO for software shader draw OGLVertexArray hw_vao; // VAO for hardware shader / accelerate draw @@ -283,6 +277,7 @@ private: OGLStreamBuffer vertex_buffer; OGLStreamBuffer uniform_buffer; OGLStreamBuffer index_buffer; + OGLStreamBuffer texture_buffer; OGLFramebuffer framebuffer; GLint uniform_buffer_alignment; size_t uniform_size_aligned_vs; @@ -291,32 +286,15 @@ private: SamplerInfo texture_cube_sampler; - OGLBuffer lighting_lut_buffer; - OGLTexture lighting_lut; + OGLTexture texture_buffer_lut_rg; + OGLTexture texture_buffer_lut_rgba; + std::array, Pica::LightingRegs::NumLightingSampler> lighting_lut_data{}; - - OGLBuffer fog_lut_buffer; - OGLTexture fog_lut; std::array fog_lut_data{}; - - OGLBuffer proctex_noise_lut_buffer; - OGLTexture proctex_noise_lut; std::array proctex_noise_lut_data{}; - - OGLBuffer proctex_color_map_buffer; - OGLTexture proctex_color_map; std::array proctex_color_map_data{}; - - OGLBuffer proctex_alpha_map_buffer; - OGLTexture proctex_alpha_map; std::array proctex_alpha_map_data{}; - - OGLBuffer proctex_lut_buffer; - OGLTexture proctex_lut; std::array proctex_lut_data{}; - - OGLBuffer proctex_diff_lut_buffer; - OGLTexture proctex_diff_lut; std::array proctex_diff_lut_data{}; bool allow_shadow; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 0ae9794de..db61dde21 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -32,6 +32,7 @@ namespace GLShader { static const std::string UniformBlockDef = R"( #define NUM_TEV_STAGES 6 #define NUM_LIGHTS 8 +#define NUM_LIGHTING_SAMPLERS 24 struct LightSrc { vec3 specular_0; @@ -55,6 +56,13 @@ layout (std140) uniform shader_data { int scissor_y1; int scissor_x2; int scissor_y2; + int fog_lut_offset; + int proctex_noise_lut_offset; + int proctex_color_map_offset; + int proctex_alpha_map_offset; + int proctex_lut_offset; + int proctex_diff_lut_offset; + ivec4 lighting_lut_offset[NUM_LIGHTING_SAMPLERS / 4]; vec3 fog_color; vec2 proctex_noise_f; vec2 proctex_noise_a; @@ -1017,7 +1025,7 @@ void AppendProcTexClamp(std::string& out, const std::string& var, ProcTexClamp m } void AppendProcTexCombineAndMap(std::string& out, ProcTexCombiner combiner, - const std::string& map_lut) { + const std::string& offset) { std::string combined; switch (combiner) { case ProcTexCombiner::U: @@ -1055,7 +1063,7 @@ void AppendProcTexCombineAndMap(std::string& out, ProcTexCombiner combiner, combined = "0.0"; break; } - out += "ProcTexLookupLUT(" + map_lut + ", " + combined + ")"; + out += "ProcTexLookupLUT(" + offset + ", " + combined + ")"; } void AppendProcTexSampler(std::string& out, const PicaFSConfig& config) { @@ -1064,12 +1072,12 @@ void AppendProcTexSampler(std::string& out, const PicaFSConfig& config) { // coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using // value entries and difference entries. out += R"( -float ProcTexLookupLUT(samplerBuffer lut, float coord) { +float ProcTexLookupLUT(int offset, float coord) { coord *= 128; float index_i = clamp(floor(coord), 0.0, 127.0); float index_f = coord - index_i; // fract() cannot be used here because 128.0 needs to be // extracted as index_i = 127.0 and index_f = 1.0 - vec2 entry = texelFetch(lut, int(index_i)).rg; + vec2 entry = texelFetch(texture_buffer_lut_rg, int(index_i) + offset).rg; return clamp(entry.r + entry.g * index_f, 0.0, 1.0); } )"; @@ -1105,8 +1113,8 @@ float ProcTexNoiseCoef(vec2 x) { float g2 = ProcTexNoiseRand2D(point + vec2(0.0, 1.0)) * (frac.x + frac.y - 1.0); float g3 = ProcTexNoiseRand2D(point + vec2(1.0, 1.0)) * (frac.x + frac.y - 2.0); - float x_noise = ProcTexLookupLUT(proctex_noise_lut, frac.x); - float y_noise = ProcTexLookupLUT(proctex_noise_lut, frac.y); + float x_noise = ProcTexLookupLUT(proctex_noise_lut_offset, frac.x); + float y_noise = ProcTexLookupLUT(proctex_noise_lut_offset, frac.y); float x0 = mix(g0, g1, x_noise); float x1 = mix(g2, g3, x_noise); return mix(x0, x1, y_noise); @@ -1148,7 +1156,8 @@ float ProcTexNoiseCoef(vec2 x) { // Combine and map out += "float lut_coord = "; - AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner, "proctex_color_map"); + AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner, + "proctex_color_map_offset"); out += ";\n"; // Look up color @@ -1162,14 +1171,17 @@ float ProcTexNoiseCoef(vec2 x) { out += "int lut_index_i = int(lut_coord) + " + std::to_string(config.state.proctex.lut_offset) + ";\n"; out += "float lut_index_f = fract(lut_coord);\n"; - out += "vec4 final_color = texelFetch(proctex_lut, lut_index_i) + lut_index_f * " - "texelFetch(proctex_diff_lut, lut_index_i);\n"; + out += "vec4 final_color = texelFetch(texture_buffer_lut_rgba, lut_index_i + " + "proctex_lut_offset) + " + "lut_index_f * " + "texelFetch(texture_buffer_lut_rgba, lut_index_i + proctex_diff_lut_offset);\n"; break; case ProcTexFilter::Nearest: case ProcTexFilter::NearestMipmapLinear: case ProcTexFilter::NearestMipmapNearest: out += "lut_coord += " + std::to_string(config.state.proctex.lut_offset) + ";\n"; - out += "vec4 final_color = texelFetch(proctex_lut, int(round(lut_coord)));\n"; + out += "vec4 final_color = texelFetch(texture_buffer_lut_rgba, int(round(lut_coord)) + " + "proctex_lut_offset);\n"; break; } @@ -1177,7 +1189,8 @@ float ProcTexNoiseCoef(vec2 x) { // Note: in separate alpha mode, the alpha channel skips the color LUT look up stage. It // uses the output of CombineAndMap directly instead. out += "float final_alpha = "; - AppendProcTexCombineAndMap(out, config.state.proctex.alpha_combiner, "proctex_alpha_map"); + AppendProcTexCombineAndMap(out, config.state.proctex.alpha_combiner, + "proctex_alpha_map_offset"); out += ";\n"; out += "return vec4(final_color.xyz, final_alpha);\n}\n"; } else { @@ -1210,13 +1223,8 @@ uniform sampler2D tex0; uniform sampler2D tex1; uniform sampler2D tex2; uniform samplerCube tex_cube; -uniform samplerBuffer lighting_lut; -uniform samplerBuffer fog_lut; -uniform samplerBuffer proctex_noise_lut; -uniform samplerBuffer proctex_color_map; -uniform samplerBuffer proctex_alpha_map; -uniform samplerBuffer proctex_lut; -uniform samplerBuffer proctex_diff_lut; +uniform samplerBuffer texture_buffer_lut_rg; +uniform samplerBuffer texture_buffer_lut_rgba; #if ALLOW_SHADOW layout(r32ui) uniform readonly uimage2D shadow_texture_px; @@ -1238,7 +1246,7 @@ vec3 quaternion_rotate(vec4 q, vec3 v) { } float LookupLightingLUT(int lut_index, int index, float delta) { - vec2 entry = texelFetch(lighting_lut, lut_index * 256 + index).rg; + vec2 entry = texelFetch(texture_buffer_lut_rg, lighting_lut_offset[lut_index >> 2][lut_index & 3] + index).rg; return entry.r + entry.g * delta; } @@ -1481,7 +1489,8 @@ vec4 secondary_fragment_color = vec4(0.0); // Generate clamped fog factor from LUT for given fog index out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n"; out += "float fog_f = fog_index - fog_i;\n"; - out += "vec2 fog_lut_entry = texelFetch(fog_lut, int(fog_i)).rg;\n"; + out += "vec2 fog_lut_entry = texelFetch(texture_buffer_lut_rg, int(fog_i) + " + "fog_lut_offset).rg;\n"; out += "float fog_factor = fog_lut_entry.r + fog_lut_entry.g * fog_f;\n"; out += "fog_factor = clamp(fog_factor, 0.0, 1.0);\n"; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index e70b5d87a..3b3faea9a 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -55,13 +55,8 @@ static void SetShaderSamplerBindings(GLuint shader) { SetShaderSamplerBinding(shader, "tex_cube", TextureUnits::TextureCube); // Set the texture samplers to correspond to different lookup table texture units - SetShaderSamplerBinding(shader, "lighting_lut", TextureUnits::LightingLUT); - SetShaderSamplerBinding(shader, "fog_lut", TextureUnits::FogLUT); - SetShaderSamplerBinding(shader, "proctex_noise_lut", TextureUnits::ProcTexNoiseLUT); - SetShaderSamplerBinding(shader, "proctex_color_map", TextureUnits::ProcTexColorMap); - SetShaderSamplerBinding(shader, "proctex_alpha_map", TextureUnits::ProcTexAlphaMap); - SetShaderSamplerBinding(shader, "proctex_lut", TextureUnits::ProcTexLUT); - SetShaderSamplerBinding(shader, "proctex_diff_lut", TextureUnits::ProcTexDiffLUT); + SetShaderSamplerBinding(shader, "texture_buffer_lut_rg", TextureUnits::TextureBufferLUT_RG); + SetShaderSamplerBinding(shader, "texture_buffer_lut_rgba", TextureUnits::TextureBufferLUT_RGBA); SetShaderImageBinding(shader, "shadow_buffer", ImageUnits::ShadowBuffer); SetShaderImageBinding(shader, "shadow_texture_px", ImageUnits::ShadowTexturePX); diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 73acc3297..3233f99e7 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -6,6 +6,7 @@ #include #include +#include "video_core/regs_lighting.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_opengl/pica_to_gl.h" @@ -38,6 +39,13 @@ struct UniformData { GLint scissor_y1; GLint scissor_x2; GLint scissor_y2; + GLint fog_lut_offset; + GLint proctex_noise_lut_offset; + GLint proctex_color_map_offset; + GLint proctex_alpha_map_offset; + GLint proctex_lut_offset; + GLint proctex_diff_lut_offset; + alignas(16) GLivec4 lighting_lut_offset[Pica::LightingRegs::NumLightingSampler / 4]; alignas(16) GLvec3 fog_color; alignas(8) GLvec2 proctex_noise_f; alignas(8) GLvec2 proctex_noise_a; @@ -50,7 +58,7 @@ struct UniformData { }; static_assert( - sizeof(UniformData) == 0x470, + sizeof(UniformData) == 0x4e0, "The size of the UniformData structure has changed, update the structure in the shader"); static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 95dbd591b..0d41242ee 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -55,15 +55,8 @@ OpenGLState::OpenGLState() { texture_cube_unit.texture_cube = 0; texture_cube_unit.sampler = 0; - lighting_lut.texture_buffer = 0; - - fog_lut.texture_buffer = 0; - - proctex_lut.texture_buffer = 0; - proctex_diff_lut.texture_buffer = 0; - proctex_color_map.texture_buffer = 0; - proctex_alpha_map.texture_buffer = 0; - proctex_noise_lut.texture_buffer = 0; + texture_buffer_lut_rg.texture_buffer = 0; + texture_buffer_lut_rgba.texture_buffer = 0; image_shadow_buffer = 0; image_shadow_texture_px = 0; @@ -221,46 +214,17 @@ void OpenGLState::Apply() const { glBindSampler(TextureUnits::TextureCube.id, texture_cube_unit.sampler); } - // Lighting LUTs - if (lighting_lut.texture_buffer != cur_state.lighting_lut.texture_buffer) { - glActiveTexture(TextureUnits::LightingLUT.Enum()); - glBindTexture(GL_TEXTURE_BUFFER, lighting_lut.texture_buffer); + // Texture buffer LUTs + if (texture_buffer_lut_rg.texture_buffer != cur_state.texture_buffer_lut_rg.texture_buffer) { + glActiveTexture(TextureUnits::TextureBufferLUT_RG.Enum()); + glBindTexture(GL_TEXTURE_BUFFER, texture_buffer_lut_rg.texture_buffer); } - // Fog LUT - if (fog_lut.texture_buffer != cur_state.fog_lut.texture_buffer) { - glActiveTexture(TextureUnits::FogLUT.Enum()); - glBindTexture(GL_TEXTURE_BUFFER, fog_lut.texture_buffer); - } - - // ProcTex Noise LUT - if (proctex_noise_lut.texture_buffer != cur_state.proctex_noise_lut.texture_buffer) { - glActiveTexture(TextureUnits::ProcTexNoiseLUT.Enum()); - glBindTexture(GL_TEXTURE_BUFFER, proctex_noise_lut.texture_buffer); - } - - // ProcTex Color Map - if (proctex_color_map.texture_buffer != cur_state.proctex_color_map.texture_buffer) { - glActiveTexture(TextureUnits::ProcTexColorMap.Enum()); - glBindTexture(GL_TEXTURE_BUFFER, proctex_color_map.texture_buffer); - } - - // ProcTex Alpha Map - if (proctex_alpha_map.texture_buffer != cur_state.proctex_alpha_map.texture_buffer) { - glActiveTexture(TextureUnits::ProcTexAlphaMap.Enum()); - glBindTexture(GL_TEXTURE_BUFFER, proctex_alpha_map.texture_buffer); - } - - // ProcTex LUT - if (proctex_lut.texture_buffer != cur_state.proctex_lut.texture_buffer) { - glActiveTexture(TextureUnits::ProcTexLUT.Enum()); - glBindTexture(GL_TEXTURE_BUFFER, proctex_lut.texture_buffer); - } - - // ProcTex Diff LUT - if (proctex_diff_lut.texture_buffer != cur_state.proctex_diff_lut.texture_buffer) { - glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum()); - glBindTexture(GL_TEXTURE_BUFFER, proctex_diff_lut.texture_buffer); + // Texture buffer LUTs + if (texture_buffer_lut_rgba.texture_buffer != + cur_state.texture_buffer_lut_rgba.texture_buffer) { + glActiveTexture(TextureUnits::TextureBufferLUT_RGBA.Enum()); + glBindTexture(GL_TEXTURE_BUFFER, texture_buffer_lut_rgba.texture_buffer); } // Shadow Images @@ -374,20 +338,10 @@ OpenGLState& OpenGLState::ResetTexture(GLuint handle) { } if (texture_cube_unit.texture_cube == handle) texture_cube_unit.texture_cube = 0; - if (lighting_lut.texture_buffer == handle) - lighting_lut.texture_buffer = 0; - if (fog_lut.texture_buffer == handle) - fog_lut.texture_buffer = 0; - if (proctex_noise_lut.texture_buffer == handle) - proctex_noise_lut.texture_buffer = 0; - if (proctex_color_map.texture_buffer == handle) - proctex_color_map.texture_buffer = 0; - if (proctex_alpha_map.texture_buffer == handle) - proctex_alpha_map.texture_buffer = 0; - if (proctex_lut.texture_buffer == handle) - proctex_lut.texture_buffer = 0; - if (proctex_diff_lut.texture_buffer == handle) - proctex_diff_lut.texture_buffer = 0; + if (texture_buffer_lut_rg.texture_buffer == handle) + texture_buffer_lut_rg.texture_buffer = 0; + if (texture_buffer_lut_rgba.texture_buffer == handle) + texture_buffer_lut_rgba.texture_buffer = 0; if (image_shadow_buffer == handle) image_shadow_buffer = 0; if (image_shadow_texture_px == handle) diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index ebc217349..1cf9b8d36 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -20,14 +20,9 @@ constexpr TextureUnit PicaTexture(int unit) { return TextureUnit{unit}; } -constexpr TextureUnit LightingLUT{3}; -constexpr TextureUnit FogLUT{4}; -constexpr TextureUnit ProcTexNoiseLUT{5}; -constexpr TextureUnit ProcTexColorMap{6}; -constexpr TextureUnit ProcTexAlphaMap{7}; -constexpr TextureUnit ProcTexLUT{8}; -constexpr TextureUnit ProcTexDiffLUT{9}; -constexpr TextureUnit TextureCube{10}; +constexpr TextureUnit TextureCube{3}; +constexpr TextureUnit TextureBufferLUT_RG{4}; +constexpr TextureUnit TextureBufferLUT_RGBA{5}; } // namespace TextureUnits @@ -105,31 +100,11 @@ public: struct { GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER - } lighting_lut; + } texture_buffer_lut_rg; struct { GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER - } fog_lut; - - struct { - GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER - } proctex_noise_lut; - - struct { - GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER - } proctex_color_map; - - struct { - GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER - } proctex_alpha_map; - - struct { - GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER - } proctex_lut; - - struct { - GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER - } proctex_diff_lut; + } texture_buffer_lut_rgba; // GL_IMAGE_BINDING_NAME GLuint image_shadow_buffer; diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 1957cfbcc..03a8ed8b7 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -87,7 +87,7 @@ std::tuple OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a void OGLStreamBuffer::Unmap(GLsizeiptr size) { ASSERT(size <= mapped_size); - if (!coherent) { + if (!coherent && size > 0) { glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size); } diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index 45d4bc4bb..faada1556 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -23,6 +23,10 @@ using GLuvec2 = std::array; using GLuvec3 = std::array; using GLuvec4 = std::array; +using GLivec2 = std::array; +using GLivec3 = std::array; +using GLivec4 = std::array; + namespace PicaToGL { inline GLenum TextureFilterMode(Pica::TexturingRegs::TextureConfig::TextureFilter mode) {