From 61ffb0fb280fd5c4f518aa600f16366b355855ea Mon Sep 17 00:00:00 2001 From: Nicolas Hake Date: Tue, 17 Mar 2015 22:54:22 +0100 Subject: [PATCH] gl: Use fewer uniform components to upload bones (#1285) The last row of the bone transformation matrix always is 0,0,0,1 so there's no point in uploading it. Also reducing the max bone count to 80 which means the uniform array will fit into the available space on 6000 and 7000 series Geforce GPUs. If we're short on uniform components, don't transpose the transformation matrix before sending it to the shader, and transpose it in the shader itself instead, saving 4 components per bone. --- planet/Graphics.ocg/ObjectDefaultVS.glsl | 24 ++++++++++++++++++++---- src/graphics/C4DrawGL.cpp | 10 ++++++++++ src/graphics/C4DrawGL.h | 5 +++++ src/graphics/C4DrawMeshGL.cpp | 21 +++++++++++++-------- src/graphics/C4Shader.h | 6 ++++++ 5 files changed, 54 insertions(+), 12 deletions(-) diff --git a/planet/Graphics.ocg/ObjectDefaultVS.glsl b/planet/Graphics.ocg/ObjectDefaultVS.glsl index 045ca6102..1a9213785 100644 --- a/planet/Graphics.ocg/ObjectDefaultVS.glsl +++ b/planet/Graphics.ocg/ObjectDefaultVS.glsl @@ -15,9 +15,11 @@ // Default Vertex Shader for mesh-based objects. -// Input uniforms: +// Input uniforms, if OC_WA_LOW_MAX_VERTEX_UNIFORM_COMPONENTS is NOT defined: // bones: array of 4x3 bone transformation matrices. +// Input uniforms, if OC_WA_LOW_MAX_VERTEX_UNIFORM_COMPONENTS is defined: +// bones: array of 3x4 transposed bone transformation matrices. // Input vertex attributes: // oc_BoneWeights0 and oc_BoneWeight1: vectors of bone influence weights. @@ -32,8 +34,15 @@ // inside the bone matrix array that contains the identity matrix, with a bone // weight of 1.0. +#define MAX_BONE_COUNT 80 + varying vec3 normalDir; -uniform mat4 bones[128]; + +#ifndef OC_WA_LOW_MAX_VERTEX_UNIFORM_COMPONENTS +uniform mat4x3 bones[MAX_BONE_COUNT]; +#else +uniform mat3x4 bones[MAX_BONE_COUNT]; +#endif // For more performance, this should be set by the engine, and this shader // should be compiled three times: with BONE_COUNT set to 0, 4, and 8, @@ -48,10 +57,17 @@ attribute vec4 oc_BoneIndices1; attribute vec4 oc_BoneWeights1; #endif -vec4 merge_bone(vec4 vertex, vec4 original, mat4 bone, float weight) +#ifndef OC_WA_LOW_MAX_VERTEX_UNIFORM_COMPONENTS +vec4 merge_bone(vec4 vertex, vec4 original, mat4x3 bone, float weight) { - return (bone * original) * weight + vertex; + return (mat4(bone) * original) * weight + vertex; } +#else +vec4 merge_bone(vec4 vertex, vec4 original, mat3x4 bone, float weight) +{ + return (mat4(transpose(bone)) * original) * weight + vertex; +} +#endif slice(position) { diff --git a/src/graphics/C4DrawGL.cpp b/src/graphics/C4DrawGL.cpp index f0c2b9cd3..83c296174 100644 --- a/src/graphics/C4DrawGL.cpp +++ b/src/graphics/C4DrawGL.cpp @@ -260,6 +260,15 @@ CStdGLCtx *CStdGL::CreateContext(C4Window * pWindow, C4AbstractApp *pApp) LogSilentF("GLExt: %s", gl_extensions ? gl_extensions : ""); } } + + // Check which workarounds we have to apply + { + // If we have less than 2048 uniform components available, we + // need to upload bone matrices in a different way + GLint count; + glGetIntegerv(GL_MAX_VERTEX_UNIFORM_COMPONENTS, &count); + Workarounds.LowMaxVertexUniformCount = count < 2048; + } } if (!success) { @@ -820,6 +829,7 @@ void CStdGL::Default() iPixelFormat=0; sfcFmt=0; iClrDpt=0; + Workarounds.LowMaxVertexUniformCount = false; } #endif // USE_CONSOLE diff --git a/src/graphics/C4DrawGL.h b/src/graphics/C4DrawGL.h index 700ab9d5a..6c9834db4 100644 --- a/src/graphics/C4DrawGL.h +++ b/src/graphics/C4DrawGL.h @@ -184,6 +184,11 @@ public: C4Shader* GetSpriteShader(int ssc); C4Shader* GetSpriteShader(bool haveBase, bool haveOverlay, bool haveNormal); + struct + { + bool LowMaxVertexUniformCount; + } Workarounds; + protected: bool CreatePrimarySurfaces(unsigned int iXRes, unsigned int iYRes, int iColorDepth, unsigned int iMonitor); diff --git a/src/graphics/C4DrawMeshGL.cpp b/src/graphics/C4DrawMeshGL.cpp index 32bc21946..c34e344e7 100644 --- a/src/graphics/C4DrawMeshGL.cpp +++ b/src/graphics/C4DrawMeshGL.cpp @@ -127,7 +127,11 @@ namespace "}\n" ); } - return buf; + + if (pGL->Workarounds.LowMaxVertexUniformCount) + return StdStrBuf("#define OC_WA_LOW_MAX_VERTEX_UNIFORM_COMPONENTS\n") + buf; + else + return buf; } // Note this only gets the code which inserts the slices specific for the pass @@ -469,7 +473,7 @@ namespace // Or, even better, we could upload them into a UBO, but Intel doesn't support them prior to Sandy Bridge. struct BoneTransform { - float m[4][4]; + float m[3][4]; }; std::vector bones; if (mesh_instance.GetBoneCount() == 0) @@ -478,8 +482,7 @@ namespace static const BoneTransform dummy_bone = { 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 1.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f + 0.0f, 0.0f, 1.0f, 0.0f }; bones.push_back(dummy_bone); } @@ -492,8 +495,7 @@ namespace BoneTransform cooked_bone = { bone(0, 0), bone(0, 1), bone(0, 2), bone(0, 3), bone(1, 0), bone(1, 1), bone(1, 2), bone(1, 3), - bone(2, 0), bone(2, 1), bone(2, 2), bone(2, 3), - 0, 0, 0, 1 + bone(2, 0), bone(2, 1), bone(2, 2), bone(2, 3) }; bones.push_back(cooked_bone); } @@ -580,8 +582,11 @@ namespace // Upload the current bone transformation matrixes (if there are any) if (!bones.empty()) - call.SetUniformMatrix4x4fv(C4SSU_Bones, bones.size(), &bones[0].m[0][0]); - + if (pGL->Workarounds.LowMaxVertexUniformCount) + glUniformMatrix3x4fv(shader->GetUniform(C4SSU_Bones), bones.size(), GL_FALSE, &bones[0].m[0][0]); + else + glUniformMatrix4x3fv(shader->GetUniform(C4SSU_Bones), bones.size(), GL_TRUE, &bones[0].m[0][0]); + // Bind the vertex data of the mesh #define VERTEX_OFFSET(field) reinterpret_cast(offsetof(StdMeshVertex, field)) glBindBuffer(GL_ARRAY_BUFFER, vbo); diff --git a/src/graphics/C4Shader.h b/src/graphics/C4Shader.h index 86e561e10..53e38cc44 100644 --- a/src/graphics/C4Shader.h +++ b/src/graphics/C4Shader.h @@ -196,6 +196,12 @@ public: if (pShader->HaveUniform(iUniform)) glUniformMatrix3x2fv(pShader->GetUniform(iUniform), iLength, GL_TRUE, pVals); } + + void SetUniformMatrix3x4fv(int iUniform, int iLength, const float *pVals) const { + if (pShader->HaveUniform(iUniform)) + glUniformMatrix4x3fv(pShader->GetUniform(iUniform), iLength, GL_TRUE, pVals); + } + void SetUniformMatrix4x4fv(int iUniform, int iLength, const float* pVals) const { if (pShader->HaveUniform(iUniform)) glUniformMatrix4fvARB(pShader->GetUniform(iUniform), iLength, GL_TRUE, pVals);