forked from Mirrors/openclonk
gl: Use fewer uniform components to upload bones (#1285)
The last row of the bone transformation matrix always is 0,0,0,1 so there's no point in uploading it. Also reducing the max bone count to 80 which means the uniform array will fit into the available space on 6000 and 7000 series Geforce GPUs. If we're short on uniform components, don't transpose the transformation matrix before sending it to the shader, and transpose it in the shader itself instead, saving 4 components per bone.stable-6.1
parent
65cf1081ee
commit
61ffb0fb28
|
@ -15,9 +15,11 @@
|
|||
|
||||
// Default Vertex Shader for mesh-based objects.
|
||||
|
||||
// Input uniforms:
|
||||
// Input uniforms, if OC_WA_LOW_MAX_VERTEX_UNIFORM_COMPONENTS is NOT defined:
|
||||
// bones: array of 4x3 bone transformation matrices.
|
||||
|
||||
// Input uniforms, if OC_WA_LOW_MAX_VERTEX_UNIFORM_COMPONENTS is defined:
|
||||
// bones: array of 3x4 transposed bone transformation matrices.
|
||||
|
||||
// Input vertex attributes:
|
||||
// oc_BoneWeights0 and oc_BoneWeight1: vectors of bone influence weights.
|
||||
|
@ -32,8 +34,15 @@
|
|||
// inside the bone matrix array that contains the identity matrix, with a bone
|
||||
// weight of 1.0.
|
||||
|
||||
#define MAX_BONE_COUNT 80
|
||||
|
||||
varying vec3 normalDir;
|
||||
uniform mat4 bones[128];
|
||||
|
||||
#ifndef OC_WA_LOW_MAX_VERTEX_UNIFORM_COMPONENTS
|
||||
uniform mat4x3 bones[MAX_BONE_COUNT];
|
||||
#else
|
||||
uniform mat3x4 bones[MAX_BONE_COUNT];
|
||||
#endif
|
||||
|
||||
// For more performance, this should be set by the engine, and this shader
|
||||
// should be compiled three times: with BONE_COUNT set to 0, 4, and 8,
|
||||
|
@ -48,10 +57,17 @@ attribute vec4 oc_BoneIndices1;
|
|||
attribute vec4 oc_BoneWeights1;
|
||||
#endif
|
||||
|
||||
vec4 merge_bone(vec4 vertex, vec4 original, mat4 bone, float weight)
|
||||
#ifndef OC_WA_LOW_MAX_VERTEX_UNIFORM_COMPONENTS
|
||||
vec4 merge_bone(vec4 vertex, vec4 original, mat4x3 bone, float weight)
|
||||
{
|
||||
return (bone * original) * weight + vertex;
|
||||
return (mat4(bone) * original) * weight + vertex;
|
||||
}
|
||||
#else
|
||||
vec4 merge_bone(vec4 vertex, vec4 original, mat3x4 bone, float weight)
|
||||
{
|
||||
return (mat4(transpose(bone)) * original) * weight + vertex;
|
||||
}
|
||||
#endif
|
||||
|
||||
slice(position)
|
||||
{
|
||||
|
|
|
@ -260,6 +260,15 @@ CStdGLCtx *CStdGL::CreateContext(C4Window * pWindow, C4AbstractApp *pApp)
|
|||
LogSilentF("GLExt: %s", gl_extensions ? gl_extensions : "");
|
||||
}
|
||||
}
|
||||
|
||||
// Check which workarounds we have to apply
|
||||
{
|
||||
// If we have less than 2048 uniform components available, we
|
||||
// need to upload bone matrices in a different way
|
||||
GLint count;
|
||||
glGetIntegerv(GL_MAX_VERTEX_UNIFORM_COMPONENTS, &count);
|
||||
Workarounds.LowMaxVertexUniformCount = count < 2048;
|
||||
}
|
||||
}
|
||||
if (!success)
|
||||
{
|
||||
|
@ -820,6 +829,7 @@ void CStdGL::Default()
|
|||
iPixelFormat=0;
|
||||
sfcFmt=0;
|
||||
iClrDpt=0;
|
||||
Workarounds.LowMaxVertexUniformCount = false;
|
||||
}
|
||||
|
||||
#endif // USE_CONSOLE
|
||||
|
|
|
@ -184,6 +184,11 @@ public:
|
|||
C4Shader* GetSpriteShader(int ssc);
|
||||
C4Shader* GetSpriteShader(bool haveBase, bool haveOverlay, bool haveNormal);
|
||||
|
||||
struct
|
||||
{
|
||||
bool LowMaxVertexUniformCount;
|
||||
} Workarounds;
|
||||
|
||||
protected:
|
||||
bool CreatePrimarySurfaces(unsigned int iXRes, unsigned int iYRes, int iColorDepth, unsigned int iMonitor);
|
||||
|
||||
|
|
|
@ -127,7 +127,11 @@ namespace
|
|||
"}\n"
|
||||
);
|
||||
}
|
||||
return buf;
|
||||
|
||||
if (pGL->Workarounds.LowMaxVertexUniformCount)
|
||||
return StdStrBuf("#define OC_WA_LOW_MAX_VERTEX_UNIFORM_COMPONENTS\n") + buf;
|
||||
else
|
||||
return buf;
|
||||
}
|
||||
|
||||
// Note this only gets the code which inserts the slices specific for the pass
|
||||
|
@ -469,7 +473,7 @@ namespace
|
|||
// Or, even better, we could upload them into a UBO, but Intel doesn't support them prior to Sandy Bridge.
|
||||
struct BoneTransform
|
||||
{
|
||||
float m[4][4];
|
||||
float m[3][4];
|
||||
};
|
||||
std::vector<BoneTransform> bones;
|
||||
if (mesh_instance.GetBoneCount() == 0)
|
||||
|
@ -478,8 +482,7 @@ namespace
|
|||
static const BoneTransform dummy_bone = {
|
||||
1.0f, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 1.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 1.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 1.0f
|
||||
0.0f, 0.0f, 1.0f, 0.0f
|
||||
};
|
||||
bones.push_back(dummy_bone);
|
||||
}
|
||||
|
@ -492,8 +495,7 @@ namespace
|
|||
BoneTransform cooked_bone = {
|
||||
bone(0, 0), bone(0, 1), bone(0, 2), bone(0, 3),
|
||||
bone(1, 0), bone(1, 1), bone(1, 2), bone(1, 3),
|
||||
bone(2, 0), bone(2, 1), bone(2, 2), bone(2, 3),
|
||||
0, 0, 0, 1
|
||||
bone(2, 0), bone(2, 1), bone(2, 2), bone(2, 3)
|
||||
};
|
||||
bones.push_back(cooked_bone);
|
||||
}
|
||||
|
@ -580,8 +582,11 @@ namespace
|
|||
|
||||
// Upload the current bone transformation matrixes (if there are any)
|
||||
if (!bones.empty())
|
||||
call.SetUniformMatrix4x4fv(C4SSU_Bones, bones.size(), &bones[0].m[0][0]);
|
||||
|
||||
if (pGL->Workarounds.LowMaxVertexUniformCount)
|
||||
glUniformMatrix3x4fv(shader->GetUniform(C4SSU_Bones), bones.size(), GL_FALSE, &bones[0].m[0][0]);
|
||||
else
|
||||
glUniformMatrix4x3fv(shader->GetUniform(C4SSU_Bones), bones.size(), GL_TRUE, &bones[0].m[0][0]);
|
||||
|
||||
// Bind the vertex data of the mesh
|
||||
#define VERTEX_OFFSET(field) reinterpret_cast<const uint8_t *>(offsetof(StdMeshVertex, field))
|
||||
glBindBuffer(GL_ARRAY_BUFFER, vbo);
|
||||
|
|
|
@ -196,6 +196,12 @@ public:
|
|||
if (pShader->HaveUniform(iUniform))
|
||||
glUniformMatrix3x2fv(pShader->GetUniform(iUniform), iLength, GL_TRUE, pVals);
|
||||
}
|
||||
|
||||
void SetUniformMatrix3x4fv(int iUniform, int iLength, const float *pVals) const {
|
||||
if (pShader->HaveUniform(iUniform))
|
||||
glUniformMatrix4x3fv(pShader->GetUniform(iUniform), iLength, GL_TRUE, pVals);
|
||||
}
|
||||
|
||||
void SetUniformMatrix4x4fv(int iUniform, int iLength, const float* pVals) const {
|
||||
if (pShader->HaveUniform(iUniform))
|
||||
glUniformMatrix4fvARB(pShader->GetUniform(iUniform), iLength, GL_TRUE, pVals);
|
||||
|
|
Loading…
Reference in New Issue