GL: Disable hardware-based skinning on old Intel devices

Our hardware-based skinning doesn't work on certain Intel graphics
devices. Fall back to software skinning on GPUs that return an OpenGL
3.1 context even though we explicitly request a 3.2 one.

Might fix #1459, #1689.
Nicolas Hake 2016-06-19 13:45:12 +02:00
parent 9555bf737f
commit e9c5facf91
4 changed files with 170 additions and 45 deletions

View File

@ -56,7 +56,11 @@ uniform mat3x4 bones[MAX_BONE_COUNT];
// For more performance, this should be set by the engine, and this shader
// should be compiled three times: with BONE_COUNT set to 0, 4, and 8,
// respectively. (Or we could split it even further.)
#define BONE_COUNT 8
#define BONE_COUNT 0
in vec4 oc_BoneIndices0;
in vec4 oc_BoneWeights0;

View File

@ -316,6 +316,18 @@ CStdGLCtx *CStdGL::CreateContext(C4Window * pWindow, C4AbstractApp *pApp)
const char *gl_renderer = reinterpret_cast<const char *>(glGetString(GL_RENDERER));
const char *gl_version = reinterpret_cast<const char *>(glGetString(GL_VERSION));
LogF("GL %s on %s (%s)", gl_version ? gl_version : "", gl_renderer ? gl_renderer : "", gl_vendor ? gl_vendor : "");
// Our shader-based skinning doesn't work on some Intel devices.
// Those devices return an OpenGL 3.1 context even though we
// request a 3.2 one; in this case, do CPU-based skinning instead.
assert(gl_version != NULL);
int major, minor;
sscanf(gl_version, "%d.%d", &major, &minor);
if (major < 3 || (major == 3 && minor < 2))
Workarounds.ForceSoftwareTransform = true;
if (Config.Graphics.DebugOpenGL)
// Dump extension list
@ -909,6 +921,7 @@ void CStdGL::Default()
Workarounds.LowMaxVertexUniformCount = false;
Workarounds.ForceSoftwareTransform = false;
unsigned int CStdGL::GenVAOID()

View File

@ -257,6 +257,7 @@ public:
bool LowMaxVertexUniformCount;
bool ForceSoftwareTransform;
} Workarounds;
void ObjectLabel(uint32_t identifier, uint32_t name, int32_t length, const char * label);

View File

@ -193,6 +193,9 @@ namespace
if (pGL->Workarounds.ForceSoftwareTransform)
buf = StdCopyStrBuf("#define OC_WA_FORCE_SOFTWARE_TRANSFORM\n") + buf;
if (LowMaxVertexUniformCount)
return StdStrBuf("#define OC_WA_LOW_MAX_VERTEX_UNIFORM_COMPONENTS\n") + buf;
@ -613,29 +616,16 @@ namespace
return matrix;
void RenderSubMeshImpl(const StdProjectionMatrix& projectionMatrix, const StdMeshMatrix& modelviewMatrix, const StdMeshInstance& mesh_instance, const StdSubMeshInstance& instance, DWORD dwModClr, DWORD dwBlitMode, DWORD dwPlayerColor, const C4FoWRegion* pFoW, const C4Rect& clipRect, const C4Rect& outRect, bool parity)
// Don't render with degenerate matrix
if (fabs(modelviewMatrix.Determinant()) < 1e-6)
const StdMeshMaterial& material = instance.GetMaterial();
assert(material.BestTechniqueIndex != -1);
const StdMeshMaterialTechnique& technique = material.Techniques[material.BestTechniqueIndex];
bool using_shared_vertices = instance.GetSubMesh().GetVertices().empty();
GLuint vbo = mesh_instance.GetMesh().GetVBO();
GLuint ibo = mesh_instance.GetIBO();
unsigned int vaoid = mesh_instance.GetVAOID();
size_t vertex_buffer_offset = using_shared_vertices ? 0 : instance.GetSubMesh().GetOffsetInVBO();
size_t index_buffer_offset = instance.GetSubMesh().GetOffsetInIBO(); // note this is constant
// Cook the bone transform matrixes into something that OpenGL can use. This could be moved into RenderMeshImpl.
// Or, even better, we could upload them into a UBO, but Intel doesn't support them prior to Sandy Bridge.
struct BoneTransform
float m[3][4];
std::vector<BoneTransform> CookBoneTransforms(const StdMeshInstance& mesh_instance)
// Cook the bone transform matrixes into something that OpenGL can use. This could be moved into RenderMeshImpl.
// Or, even better, we could upload them into a UBO, but Intel doesn't support them prior to Sandy Bridge.
std::vector<BoneTransform> bones;
if (mesh_instance.GetBoneCount() == 0)
@ -661,7 +651,101 @@ namespace
return bones;
struct PretransformedMeshVertex
float nx, ny, nz;
float x, y, z;
void PretransformMeshVertex(PretransformedMeshVertex *out, const StdMeshVertex &in, const StdMeshInstance &mesh_instance)
// If the first bone assignment has a weight of 0, all others are zero
// as well, or the loader would have overwritten the assignment
if (in.bone_weight[0] == 0.0f)
out->x = in.x;
out->y = in.y;
out->z = in.z;
out->nx = in.nx;
out->ny = in.ny;
out->nz =;
PretransformedMeshVertex vtx{ 0, 0, 0, 0, 0, 0 };
for (int i = 0; i < StdMeshVertex::MaxBoneWeightCount && in.bone_weight[i] > 0; ++i)
float weight = in.bone_weight[i];
const auto &bone = mesh_instance.GetBoneTransform(in.bone_index[i]);
auto vertex = weight * (bone * in);
vtx.nx += vertex.nx;
vtx.ny += vertex.ny; +=;
vtx.x += vertex.x;
vtx.y += vertex.y;
vtx.z += vertex.z;
*out = vtx;
void PretransformMeshVertices(const StdMeshInstance &mesh_instance, const StdSubMeshInstance& instance, GLuint vbo)
glBindBuffer(GL_ARRAY_BUFFER, vbo);
const auto &original_vertices = mesh_instance.GetSharedVertices().empty() ? instance.GetSubMesh().GetVertices() : mesh_instance.GetSharedVertices();
const size_t vertex_count = original_vertices.size();
// Unmapping the buffer may fail for certain reasons, in which case we need to try again.
glBufferData(GL_ARRAY_BUFFER, vertex_count * sizeof(PretransformedMeshVertex), NULL, GL_STREAM_DRAW);
void *map = glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
PretransformedMeshVertex *buffer = new (map) PretransformedMeshVertex[vertex_count];
for (size_t i = 0; i < vertex_count; ++i)
PretransformMeshVertex(&buffer[i], original_vertices[i], mesh_instance);
} while (glUnmapBuffer(GL_ARRAY_BUFFER) == GL_FALSE);
// Unbind the buffer so following rendering calls do not use it
glBindBuffer(GL_ARRAY_BUFFER, 0);
void RenderSubMeshImpl(const StdProjectionMatrix& projectionMatrix, const StdMeshMatrix& modelviewMatrix, const StdMeshInstance& mesh_instance, const StdSubMeshInstance& instance, DWORD dwModClr, DWORD dwBlitMode, DWORD dwPlayerColor, const C4FoWRegion* pFoW, const C4Rect& clipRect, const C4Rect& outRect, bool parity)
// Don't render with degenerate matrix
if (fabs(modelviewMatrix.Determinant()) < 1e-6)
const StdMeshMaterial& material = instance.GetMaterial();
assert(material.BestTechniqueIndex != -1);
const StdMeshMaterialTechnique& technique = material.Techniques[material.BestTechniqueIndex];
bool using_shared_vertices = instance.GetSubMesh().GetVertices().empty();
GLuint vbo = mesh_instance.GetMesh().GetVBO();
GLuint ibo = mesh_instance.GetIBO();
unsigned int vaoid = mesh_instance.GetVAOID();
size_t vertex_buffer_offset = using_shared_vertices ? 0 : instance.GetSubMesh().GetOffsetInVBO();
size_t index_buffer_offset = instance.GetSubMesh().GetOffsetInIBO(); // note this is constant
const bool ForceSoftwareTransform = pGL->Workarounds.ForceSoftwareTransform;
GLuint pretransform_vbo;
std::vector<BoneTransform> bones;
if (!ForceSoftwareTransform)
bones = CookBoneTransforms(mesh_instance);
glGenBuffers(1, &pretransform_vbo);
PretransformMeshVertices(mesh_instance, instance, pretransform_vbo);
// Modelview matrix does not change between passes, so cache it here
const StdMeshMatrix normalMatrixTranspose = StdMeshMatrix::Inverse(modelviewMatrix);
@ -750,6 +834,8 @@ namespace
call.SetUniform1f(C4SSU_MaterialShininess, pass.Shininess);
// Upload the current bone transformation matrixes (if there are any)
if (!ForceSoftwareTransform)
if (!bones.empty())
if (pGL->Workarounds.LowMaxVertexUniformCount)
@ -757,6 +843,7 @@ namespace
glUniformMatrix4x3fv(shader->GetUniform(C4SSU_Bones), bones.size(), GL_TRUE, &bones[0].m[0][0]);
GLuint vao;
const bool has_vao = pGL->GetVAO(vaoid, vao);
@ -773,25 +860,42 @@ namespace
glBindBuffer(GL_ARRAY_BUFFER, vbo);
#define VERTEX_OFFSET(field) reinterpret_cast<const uint8_t *>(offsetof(StdMeshVertex, field))
glVertexAttribPointer(shader->GetAttribute(C4SSA_Position), 3, GL_FLOAT, GL_FALSE, sizeof(StdMeshVertex), VERTEX_OFFSET(x));
glVertexAttribPointer(shader->GetAttribute(C4SSA_Normal), 3, GL_FLOAT, GL_FALSE, sizeof(StdMeshVertex), VERTEX_OFFSET(nx));
if (shader->GetAttribute(C4SSA_TexCoord) != -1)
glVertexAttribPointer(shader->GetAttribute(C4SSA_TexCoord), 2, GL_FLOAT, GL_FALSE, sizeof(StdMeshVertex), VERTEX_OFFSET(u));
if (!ForceSoftwareTransform)
glVertexAttribPointer(shader->GetAttribute(C4SSA_Position), 3, GL_FLOAT, GL_FALSE, sizeof(StdMeshVertex), VERTEX_OFFSET(x));
glVertexAttribPointer(shader->GetAttribute(C4SSA_Normal), 3, GL_FLOAT, GL_FALSE, sizeof(StdMeshVertex), VERTEX_OFFSET(nx));
glVertexAttribPointer(shader->GetAttribute(C4SSA_BoneWeights0), 4, GL_FLOAT, GL_FALSE, sizeof(StdMeshVertex), VERTEX_OFFSET(bone_weight));
glVertexAttribPointer(shader->GetAttribute(C4SSA_BoneWeights1), 4, GL_FLOAT, GL_FALSE, sizeof(StdMeshVertex), VERTEX_OFFSET(bone_weight) + 4 * sizeof(std::remove_all_extents<decltype(StdMeshVertex::bone_weight)>::type));
glVertexAttribPointer(shader->GetAttribute(C4SSA_BoneIndices0), 4, GL_SHORT, GL_FALSE, sizeof(StdMeshVertex), VERTEX_OFFSET(bone_index));
glVertexAttribPointer(shader->GetAttribute(C4SSA_BoneIndices1), 4, GL_SHORT, GL_FALSE, sizeof(StdMeshVertex), VERTEX_OFFSET(bone_index) + 4 * sizeof(std::remove_all_extents<decltype(StdMeshVertex::bone_index)>::type));
if (shader->GetAttribute(C4SSA_TexCoord) != -1)
if (shader->GetAttribute(C4SSA_TexCoord) != -1)
if (ForceSoftwareTransform)
glBindBuffer(GL_ARRAY_BUFFER, pretransform_vbo);
#define VERTEX_OFFSET(field) reinterpret_cast<const uint8_t *>(offsetof(PretransformedMeshVertex, field))
glVertexAttribPointer(shader->GetAttribute(C4SSA_Position), 3, GL_FLOAT, GL_FALSE, sizeof(PretransformedMeshVertex), VERTEX_OFFSET(x));
glVertexAttribPointer(shader->GetAttribute(C4SSA_Normal), 3, GL_FLOAT, GL_FALSE, sizeof(PretransformedMeshVertex), VERTEX_OFFSET(nx));
glBindBuffer(GL_ARRAY_BUFFER, 0);
// Bind textures
for (unsigned int j = 0; j < pass.TextureUnits.size(); ++j)
@ -860,6 +964,9 @@ namespace
if (ForceSoftwareTransform)
glDeleteBuffers(1, &pretransform_vbo);
void RenderMeshImpl(const StdProjectionMatrix& projectionMatrix, const StdMeshMatrix& modelviewMatrix, StdMeshInstance& instance, DWORD dwModClr, DWORD dwBlitMode, DWORD dwPlayerColor, const C4FoWRegion* pFoW, const C4Rect& clipRect, const C4Rect& outRect, bool parity); // Needed by RenderAttachedMesh