gl: Use fewer uniform components to upload bones (#1285)

The last row of the bone transformation matrix always is 0,0,0,1 so
there's no point in uploading it. Also reducing the max bone count to 80
which means the uniform array will fit into the available space on 6000
and 7000 series Geforce GPUs.

If we're short on uniform components, don't transpose the transformation
matrix before sending it to the shader, and transpose it in the shader
itself instead, saving 4 components per bone.
stable-6.1
Nicolas Hake 2015-03-17 22:54:22 +01:00
parent 65cf1081ee
commit 61ffb0fb28
5 changed files with 54 additions and 12 deletions

View File

@ -15,9 +15,11 @@
// Default Vertex Shader for mesh-based objects.
// Input uniforms:
// Input uniforms, if OC_WA_LOW_MAX_VERTEX_UNIFORM_COMPONENTS is NOT defined:
// bones: array of 4x3 bone transformation matrices.
// Input uniforms, if OC_WA_LOW_MAX_VERTEX_UNIFORM_COMPONENTS is defined:
// bones: array of 3x4 transposed bone transformation matrices.
// Input vertex attributes:
// oc_BoneWeights0 and oc_BoneWeight1: vectors of bone influence weights.
@ -32,8 +34,15 @@
// inside the bone matrix array that contains the identity matrix, with a bone
// weight of 1.0.
#define MAX_BONE_COUNT 80
varying vec3 normalDir;
uniform mat4 bones[128];
#ifndef OC_WA_LOW_MAX_VERTEX_UNIFORM_COMPONENTS
uniform mat4x3 bones[MAX_BONE_COUNT];
#else
uniform mat3x4 bones[MAX_BONE_COUNT];
#endif
// For more performance, this should be set by the engine, and this shader
// should be compiled three times: with BONE_COUNT set to 0, 4, and 8,
@ -48,10 +57,17 @@ attribute vec4 oc_BoneIndices1;
attribute vec4 oc_BoneWeights1;
#endif
vec4 merge_bone(vec4 vertex, vec4 original, mat4 bone, float weight)
#ifndef OC_WA_LOW_MAX_VERTEX_UNIFORM_COMPONENTS
vec4 merge_bone(vec4 vertex, vec4 original, mat4x3 bone, float weight)
{
return (bone * original) * weight + vertex;
return (mat4(bone) * original) * weight + vertex;
}
#else
vec4 merge_bone(vec4 vertex, vec4 original, mat3x4 bone, float weight)
{
return (mat4(transpose(bone)) * original) * weight + vertex;
}
#endif
slice(position)
{

View File

@ -260,6 +260,15 @@ CStdGLCtx *CStdGL::CreateContext(C4Window * pWindow, C4AbstractApp *pApp)
LogSilentF("GLExt: %s", gl_extensions ? gl_extensions : "");
}
}
// Check which workarounds we have to apply
{
// If we have less than 2048 uniform components available, we
// need to upload bone matrices in a different way
GLint count;
glGetIntegerv(GL_MAX_VERTEX_UNIFORM_COMPONENTS, &count);
Workarounds.LowMaxVertexUniformCount = count < 2048;
}
}
if (!success)
{
@ -820,6 +829,7 @@ void CStdGL::Default()
iPixelFormat=0;
sfcFmt=0;
iClrDpt=0;
Workarounds.LowMaxVertexUniformCount = false;
}
#endif // USE_CONSOLE

View File

@ -184,6 +184,11 @@ public:
C4Shader* GetSpriteShader(int ssc);
C4Shader* GetSpriteShader(bool haveBase, bool haveOverlay, bool haveNormal);
struct
{
bool LowMaxVertexUniformCount;
} Workarounds;
protected:
bool CreatePrimarySurfaces(unsigned int iXRes, unsigned int iYRes, int iColorDepth, unsigned int iMonitor);

View File

@ -127,7 +127,11 @@ namespace
"}\n"
);
}
return buf;
if (pGL->Workarounds.LowMaxVertexUniformCount)
return StdStrBuf("#define OC_WA_LOW_MAX_VERTEX_UNIFORM_COMPONENTS\n") + buf;
else
return buf;
}
// Note this only gets the code which inserts the slices specific for the pass
@ -469,7 +473,7 @@ namespace
// Or, even better, we could upload them into a UBO, but Intel doesn't support them prior to Sandy Bridge.
struct BoneTransform
{
float m[4][4];
float m[3][4];
};
std::vector<BoneTransform> bones;
if (mesh_instance.GetBoneCount() == 0)
@ -478,8 +482,7 @@ namespace
static const BoneTransform dummy_bone = {
1.0f, 0.0f, 0.0f, 0.0f,
0.0f, 1.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f
0.0f, 0.0f, 1.0f, 0.0f
};
bones.push_back(dummy_bone);
}
@ -492,8 +495,7 @@ namespace
BoneTransform cooked_bone = {
bone(0, 0), bone(0, 1), bone(0, 2), bone(0, 3),
bone(1, 0), bone(1, 1), bone(1, 2), bone(1, 3),
bone(2, 0), bone(2, 1), bone(2, 2), bone(2, 3),
0, 0, 0, 1
bone(2, 0), bone(2, 1), bone(2, 2), bone(2, 3)
};
bones.push_back(cooked_bone);
}
@ -580,8 +582,11 @@ namespace
// Upload the current bone transformation matrixes (if there are any)
if (!bones.empty())
call.SetUniformMatrix4x4fv(C4SSU_Bones, bones.size(), &bones[0].m[0][0]);
if (pGL->Workarounds.LowMaxVertexUniformCount)
glUniformMatrix3x4fv(shader->GetUniform(C4SSU_Bones), bones.size(), GL_FALSE, &bones[0].m[0][0]);
else
glUniformMatrix4x3fv(shader->GetUniform(C4SSU_Bones), bones.size(), GL_TRUE, &bones[0].m[0][0]);
// Bind the vertex data of the mesh
#define VERTEX_OFFSET(field) reinterpret_cast<const uint8_t *>(offsetof(StdMeshVertex, field))
glBindBuffer(GL_ARRAY_BUFFER, vbo);

View File

@ -196,6 +196,12 @@ public:
if (pShader->HaveUniform(iUniform))
glUniformMatrix3x2fv(pShader->GetUniform(iUniform), iLength, GL_TRUE, pVals);
}
void SetUniformMatrix3x4fv(int iUniform, int iLength, const float *pVals) const {
if (pShader->HaveUniform(iUniform))
glUniformMatrix4x3fv(pShader->GetUniform(iUniform), iLength, GL_TRUE, pVals);
}
void SetUniformMatrix4x4fv(int iUniform, int iLength, const float* pVals) const {
if (pShader->HaveUniform(iUniform))
glUniformMatrix4fvARB(pShader->GetUniform(iUniform), iLength, GL_TRUE, pVals);