From 5960282303e3b5883b14a0174dcd0af3646e2f16 Mon Sep 17 00:00:00 2001
From: Markus Wick <markus@selfnet.de>
Date: Wed, 2 May 2018 09:34:36 +0200
Subject: [PATCH] gl_rasterizer: Use buffer_storage for uniform data.

This replaces the glBufferData logic with the shared stream buffer code.
The new code doesn't need a temporary staging buffer any more, so the
performance should imrpove quite a bit.
---
 .../renderer_opengl/gl_rasterizer.cpp         | 35 +++++++++++++------
 .../renderer_opengl/gl_rasterizer.h           | 12 +++++--
 2 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 7c44a77cb..2e738694e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -31,7 +31,8 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
 MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
 
 RasterizerOpenGL::RasterizerOpenGL()
-    : shader_dirty(true), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE) {
+    : shader_dirty(true), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE),
+      uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE) {
     // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
     state.clip_distance[0] = true;
 
@@ -48,16 +49,12 @@ RasterizerOpenGL::RasterizerOpenGL()
 
     // Generate VBO, VAO and UBO
     vertex_array.Create();
-    uniform_buffer.Create();
 
     state.draw.vertex_array = vertex_array.handle;
     state.draw.vertex_buffer = vertex_buffer.GetHandle();
-    state.draw.uniform_buffer = uniform_buffer.handle;
+    state.draw.uniform_buffer = uniform_buffer.GetHandle();
     state.Apply();
 
-    // Bind the UBO to binding point 0
-    glBindBufferBase(GL_UNIFORM_BUFFER, 0, uniform_buffer.handle);
-
     uniform_block_data.dirty = true;
 
     uniform_block_data.lut_dirty.fill(true);
@@ -70,6 +67,10 @@ RasterizerOpenGL::RasterizerOpenGL()
     uniform_block_data.proctex_lut_dirty = true;
     uniform_block_data.proctex_diff_lut_dirty = true;
 
+    glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
+    uniform_size_aligned_fs =
+        Common::AlignUp<size_t>(sizeof(UniformData), uniform_buffer_alignment);
+
     // Set vertex attributes
     glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE,
                           sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position));
@@ -475,11 +476,7 @@ void RasterizerOpenGL::DrawTriangles() {
     }
 
     // Sync the uniform data
-    if (uniform_block_data.dirty) {
-        glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data,
-                     GL_STATIC_DRAW);
-        uniform_block_data.dirty = false;
-    }
+    UploadUniforms();
 
     // Viewport can have negative offsets or larger
     // dimensions than our framebuffer sub-rect.
@@ -1652,3 +1649,19 @@ void RasterizerOpenGL::SyncLightDistanceAttenuationScale(int light_index) {
         uniform_block_data.dirty = true;
     }
 }
+
+void RasterizerOpenGL::UploadUniforms() {
+    if (!uniform_block_data.dirty)
+        return;
+
+    size_t uniform_size = uniform_size_aligned_fs;
+    u8* uniforms;
+    GLintptr offset;
+    std::tie(uniforms, offset, std::ignore) =
+        uniform_buffer.Map(uniform_size, uniform_buffer_alignment);
+    std::memcpy(uniforms, &uniform_block_data.data, sizeof(UniformData));
+    uniform_buffer.Unmap(uniform_size);
+    glBindBufferRange(GL_UNIFORM_BUFFER, 0, uniform_buffer.GetHandle(), offset,
+                      sizeof(UniformData));
+    uniform_block_data.dirty = false;
+}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 02771a189..c02d3ece7 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -215,6 +215,9 @@ private:
     /// Syncs the specified light's distance attenuation scale to match the PICA register
     void SyncLightDistanceAttenuationScale(int light_index);
 
+    /// Upload the uniform blocks to the uniform buffer object
+    void UploadUniforms();
+
     OpenGLState state;
 
     RasterizerCacheOpenGL res_cache;
@@ -237,12 +240,17 @@ private:
 
     std::unique_ptr<ShaderProgramManager> shader_program_manager;
 
+    // They shall be big enough for about one frame.
+    static constexpr size_t VERTEX_BUFFER_SIZE = 32 * 1024 * 1024;
+    static constexpr size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
+
     std::array<SamplerInfo, 3> texture_samplers;
     OGLVertexArray vertex_array;
-    static constexpr size_t VERTEX_BUFFER_SIZE = 32 * 1024 * 1024;
     OGLStreamBuffer vertex_buffer;
-    OGLBuffer uniform_buffer;
+    OGLStreamBuffer uniform_buffer;
     OGLFramebuffer framebuffer;
+    GLint uniform_buffer_alignment;
+    size_t uniform_size_aligned_fs;
 
     // TODO (wwylele): consider caching texture cube in the rasterizer cache
     OGLTexture texture_cube;