From e9c5facf91ea201e909ab72b1ae845bbfb7201c7 Mon Sep 17 00:00:00 2001 From: Nicolas Hake Date: Sun, 19 Jun 2016 13:45:12 +0200 Subject: [PATCH] GL: Disable hardware-based skinning on old Intel devices Our hardware-based skinning doesn't work on certain Intel graphics devices. Fall back to software skinning on GPUs that return an OpenGL 3.1 context even though we explicitly request a 3.2 one. Might fix #1459, #1689. --- planet/Graphics.ocg/MeshVertexShader.glsl | 4 + src/graphics/C4DrawGL.cpp | 13 ++ src/graphics/C4DrawGL.h | 1 + src/graphics/C4DrawMeshGL.cpp | 197 +++++++++++++++++----- 4 files changed, 170 insertions(+), 45 deletions(-) diff --git a/planet/Graphics.ocg/MeshVertexShader.glsl b/planet/Graphics.ocg/MeshVertexShader.glsl index f357e6b47..fdb823115 100644 --- a/planet/Graphics.ocg/MeshVertexShader.glsl +++ b/planet/Graphics.ocg/MeshVertexShader.glsl @@ -56,7 +56,11 @@ uniform mat3x4 bones[MAX_BONE_COUNT]; // For more performance, this should be set by the engine, and this shader // should be compiled three times: with BONE_COUNT set to 0, 4, and 8, // respectively. (Or we could split it even further.) +#ifndef OC_WA_FORCE_SOFTWARE_TRANSFORM #define BONE_COUNT 8 +#else +#define BONE_COUNT 0 +#endif in vec4 oc_BoneIndices0; in vec4 oc_BoneWeights0; diff --git a/src/graphics/C4DrawGL.cpp b/src/graphics/C4DrawGL.cpp index 163b0ed0f..43f631bae 100644 --- a/src/graphics/C4DrawGL.cpp +++ b/src/graphics/C4DrawGL.cpp @@ -316,6 +316,18 @@ CStdGLCtx *CStdGL::CreateContext(C4Window * pWindow, C4AbstractApp *pApp) const char *gl_renderer = reinterpret_cast(glGetString(GL_RENDERER)); const char *gl_version = reinterpret_cast(glGetString(GL_VERSION)); LogF("GL %s on %s (%s)", gl_version ? gl_version : "", gl_renderer ? gl_renderer : "", gl_vendor ? gl_vendor : ""); + + // Our shader-based skinning doesn't work on some Intel devices. + // Those devices return an OpenGL 3.1 context even though we + // request a 3.2 one; in this case, do CPU-based skinning instead. + { + assert(gl_version != NULL); + int major, minor; + sscanf(gl_version, "%d.%d", &major, &minor); + if (major < 3 || (major == 3 && minor < 2)) + Workarounds.ForceSoftwareTransform = true; + } + if (Config.Graphics.DebugOpenGL) { // Dump extension list @@ -909,6 +921,7 @@ void CStdGL::Default() iPixelFormat=0; sfcFmt=0; Workarounds.LowMaxVertexUniformCount = false; + Workarounds.ForceSoftwareTransform = false; } unsigned int CStdGL::GenVAOID() diff --git a/src/graphics/C4DrawGL.h b/src/graphics/C4DrawGL.h index 2fa05007a..5e4ac026c 100644 --- a/src/graphics/C4DrawGL.h +++ b/src/graphics/C4DrawGL.h @@ -257,6 +257,7 @@ public: struct { bool LowMaxVertexUniformCount; + bool ForceSoftwareTransform; } Workarounds; void ObjectLabel(uint32_t identifier, uint32_t name, int32_t length, const char * label); diff --git a/src/graphics/C4DrawMeshGL.cpp b/src/graphics/C4DrawMeshGL.cpp index 475443401..1adad7718 100644 --- a/src/graphics/C4DrawMeshGL.cpp +++ b/src/graphics/C4DrawMeshGL.cpp @@ -193,6 +193,9 @@ namespace ); } + if (pGL->Workarounds.ForceSoftwareTransform) + buf = StdCopyStrBuf("#define OC_WA_FORCE_SOFTWARE_TRANSFORM\n") + buf; + if (LowMaxVertexUniformCount) return StdStrBuf("#define OC_WA_LOW_MAX_VERTEX_UNIFORM_COMPONENTS\n") + buf; else @@ -613,29 +616,16 @@ namespace return matrix; } - void RenderSubMeshImpl(const StdProjectionMatrix& projectionMatrix, const StdMeshMatrix& modelviewMatrix, const StdMeshInstance& mesh_instance, const StdSubMeshInstance& instance, DWORD dwModClr, DWORD dwBlitMode, DWORD dwPlayerColor, const C4FoWRegion* pFoW, const C4Rect& clipRect, const C4Rect& outRect, bool parity) + struct BoneTransform { - // Don't render with degenerate matrix - if (fabs(modelviewMatrix.Determinant()) < 1e-6) - return; - - const StdMeshMaterial& material = instance.GetMaterial(); - assert(material.BestTechniqueIndex != -1); - const StdMeshMaterialTechnique& technique = material.Techniques[material.BestTechniqueIndex]; - - bool using_shared_vertices = instance.GetSubMesh().GetVertices().empty(); - GLuint vbo = mesh_instance.GetMesh().GetVBO(); - GLuint ibo = mesh_instance.GetIBO(); - unsigned int vaoid = mesh_instance.GetVAOID(); - size_t vertex_buffer_offset = using_shared_vertices ? 0 : instance.GetSubMesh().GetOffsetInVBO(); - size_t index_buffer_offset = instance.GetSubMesh().GetOffsetInIBO(); // note this is constant + float m[3][4]; + }; + std::vector CookBoneTransforms(const StdMeshInstance& mesh_instance) + { // Cook the bone transform matrixes into something that OpenGL can use. This could be moved into RenderMeshImpl. // Or, even better, we could upload them into a UBO, but Intel doesn't support them prior to Sandy Bridge. - struct BoneTransform - { - float m[3][4]; - }; + std::vector bones; if (mesh_instance.GetBoneCount() == 0) { @@ -661,7 +651,101 @@ namespace bones.push_back(cooked_bone); } } + return bones; + } + struct PretransformedMeshVertex + { + float nx, ny, nz; + float x, y, z; + }; + + void PretransformMeshVertex(PretransformedMeshVertex *out, const StdMeshVertex &in, const StdMeshInstance &mesh_instance) + { + // If the first bone assignment has a weight of 0, all others are zero + // as well, or the loader would have overwritten the assignment + if (in.bone_weight[0] == 0.0f) + { + out->x = in.x; + out->y = in.y; + out->z = in.z; + out->nx = in.nx; + out->ny = in.ny; + out->nz = in.nz; + } + else + { + PretransformedMeshVertex vtx{ 0, 0, 0, 0, 0, 0 }; + for (int i = 0; i < StdMeshVertex::MaxBoneWeightCount && in.bone_weight[i] > 0; ++i) + { + float weight = in.bone_weight[i]; + const auto &bone = mesh_instance.GetBoneTransform(in.bone_index[i]); + auto vertex = weight * (bone * in); + vtx.nx += vertex.nx; + vtx.ny += vertex.ny; + vtx.nz += vertex.nz; + vtx.x += vertex.x; + vtx.y += vertex.y; + vtx.z += vertex.z; + } + *out = vtx; + } + } + + void PretransformMeshVertices(const StdMeshInstance &mesh_instance, const StdSubMeshInstance& instance, GLuint vbo) + { + assert(pGL->Workarounds.ForceSoftwareTransform); + glBindBuffer(GL_ARRAY_BUFFER, vbo); + + const auto &original_vertices = mesh_instance.GetSharedVertices().empty() ? instance.GetSubMesh().GetVertices() : mesh_instance.GetSharedVertices(); + const size_t vertex_count = original_vertices.size(); + + // Unmapping the buffer may fail for certain reasons, in which case we need to try again. + do + { + glBufferData(GL_ARRAY_BUFFER, vertex_count * sizeof(PretransformedMeshVertex), NULL, GL_STREAM_DRAW); + void *map = glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY); + PretransformedMeshVertex *buffer = new (map) PretransformedMeshVertex[vertex_count]; + + for (size_t i = 0; i < vertex_count; ++i) + { + PretransformMeshVertex(&buffer[i], original_vertices[i], mesh_instance); + } + } while (glUnmapBuffer(GL_ARRAY_BUFFER) == GL_FALSE); + // Unbind the buffer so following rendering calls do not use it + glBindBuffer(GL_ARRAY_BUFFER, 0); + } + + void RenderSubMeshImpl(const StdProjectionMatrix& projectionMatrix, const StdMeshMatrix& modelviewMatrix, const StdMeshInstance& mesh_instance, const StdSubMeshInstance& instance, DWORD dwModClr, DWORD dwBlitMode, DWORD dwPlayerColor, const C4FoWRegion* pFoW, const C4Rect& clipRect, const C4Rect& outRect, bool parity) + { + // Don't render with degenerate matrix + if (fabs(modelviewMatrix.Determinant()) < 1e-6) + return; + + const StdMeshMaterial& material = instance.GetMaterial(); + assert(material.BestTechniqueIndex != -1); + const StdMeshMaterialTechnique& technique = material.Techniques[material.BestTechniqueIndex]; + + bool using_shared_vertices = instance.GetSubMesh().GetVertices().empty(); + GLuint vbo = mesh_instance.GetMesh().GetVBO(); + GLuint ibo = mesh_instance.GetIBO(); + unsigned int vaoid = mesh_instance.GetVAOID(); + size_t vertex_buffer_offset = using_shared_vertices ? 0 : instance.GetSubMesh().GetOffsetInVBO(); + size_t index_buffer_offset = instance.GetSubMesh().GetOffsetInIBO(); // note this is constant + + const bool ForceSoftwareTransform = pGL->Workarounds.ForceSoftwareTransform; + GLuint pretransform_vbo; + + std::vector bones; + if (!ForceSoftwareTransform) + { + bones = CookBoneTransforms(mesh_instance); + } + else + { + glGenBuffers(1, &pretransform_vbo); + PretransformMeshVertices(mesh_instance, instance, pretransform_vbo); + } // Modelview matrix does not change between passes, so cache it here const StdMeshMatrix normalMatrixTranspose = StdMeshMatrix::Inverse(modelviewMatrix); @@ -670,18 +754,18 @@ namespace { const StdMeshMaterialPass& pass = technique.Passes[i]; - if(!pass.DepthCheck) + if (!pass.DepthCheck) glDisable(GL_DEPTH_TEST); glDepthMask(pass.DepthWrite ? GL_TRUE : GL_FALSE); - if(pass.AlphaToCoverage) + if (pass.AlphaToCoverage) glEnable(GL_SAMPLE_ALPHA_TO_COVERAGE); else glDisable(GL_SAMPLE_ALPHA_TO_COVERAGE); glFrontFace(parity ? GL_CW : GL_CCW); - if(mesh_instance.GetCompletion() < 1.0f) + if (mesh_instance.GetCompletion() < 1.0f) { // Backfaces might be visible when completion is < 1.0f since front // faces might be omitted. @@ -709,17 +793,17 @@ namespace // is <255. This makes sure that normal non-blended meshes can have // blending disabled in their material script (which disables expensive // face ordering) but when they are made translucent via clrmod - if(!(dwBlitMode & C4GFXBLIT_ADDITIVE)) + if (!(dwBlitMode & C4GFXBLIT_ADDITIVE)) { - if( ((dwModClr >> 24) & 0xff) < 0xff) // && (!(dwBlitMode & C4GFXBLIT_MOD2)) ) + if (((dwModClr >> 24) & 0xff) < 0xff) // && (!(dwBlitMode & C4GFXBLIT_MOD2)) ) glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); else glBlendFunc(OgreBlendTypeToGL(pass.SceneBlendFactors[0]), - OgreBlendTypeToGL(pass.SceneBlendFactors[1])); + OgreBlendTypeToGL(pass.SceneBlendFactors[1])); } else { - if( ((dwModClr >> 24) & 0xff) < 0xff) // && (!(dwBlitMode & C4GFXBLIT_MOD2)) ) + if (((dwModClr >> 24) & 0xff) < 0xff) // && (!(dwBlitMode & C4GFXBLIT_MOD2)) ) glBlendFunc(GL_SRC_ALPHA, GL_ONE); else glBlendFunc(OgreBlendTypeToGL(pass.SceneBlendFactors[0]), GL_ONE); @@ -729,8 +813,8 @@ namespace // Upload all parameters to the shader int ssc = 0; - if(dwBlitMode & C4GFXBLIT_MOD2) ssc |= C4SSC_MOD2; - if(pFoW != NULL) ssc |= C4SSC_LIGHT; + if (dwBlitMode & C4GFXBLIT_MOD2) ssc |= C4SSC_MOD2; + if (pFoW != NULL) ssc |= C4SSC_LIGHT; const C4Shader* shader = pass.Program->Program->GetShader(ssc); if (!shader) return; C4ShaderCall call(shader); @@ -750,12 +834,15 @@ namespace call.SetUniform1f(C4SSU_MaterialShininess, pass.Shininess); // Upload the current bone transformation matrixes (if there are any) - if (!bones.empty()) + if (!ForceSoftwareTransform) { - if (pGL->Workarounds.LowMaxVertexUniformCount) - glUniformMatrix3x4fv(shader->GetUniform(C4SSU_Bones), bones.size(), GL_FALSE, &bones[0].m[0][0]); - else - glUniformMatrix4x3fv(shader->GetUniform(C4SSU_Bones), bones.size(), GL_TRUE, &bones[0].m[0][0]); + if (!bones.empty()) + { + if (pGL->Workarounds.LowMaxVertexUniformCount) + glUniformMatrix3x4fv(shader->GetUniform(C4SSU_Bones), bones.size(), GL_FALSE, &bones[0].m[0][0]); + else + glUniformMatrix4x3fv(shader->GetUniform(C4SSU_Bones), bones.size(), GL_TRUE, &bones[0].m[0][0]); + } } GLuint vao; @@ -773,25 +860,42 @@ namespace glBindBuffer(GL_ARRAY_BUFFER, vbo); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ibo); #define VERTEX_OFFSET(field) reinterpret_cast(offsetof(StdMeshVertex, field)) - glVertexAttribPointer(shader->GetAttribute(C4SSA_Position), 3, GL_FLOAT, GL_FALSE, sizeof(StdMeshVertex), VERTEX_OFFSET(x)); - glVertexAttribPointer(shader->GetAttribute(C4SSA_Normal), 3, GL_FLOAT, GL_FALSE, sizeof(StdMeshVertex), VERTEX_OFFSET(nx)); if (shader->GetAttribute(C4SSA_TexCoord) != -1) glVertexAttribPointer(shader->GetAttribute(C4SSA_TexCoord), 2, GL_FLOAT, GL_FALSE, sizeof(StdMeshVertex), VERTEX_OFFSET(u)); - glVertexAttribPointer(shader->GetAttribute(C4SSA_BoneWeights0), 4, GL_FLOAT, GL_FALSE, sizeof(StdMeshVertex), VERTEX_OFFSET(bone_weight)); - glVertexAttribPointer(shader->GetAttribute(C4SSA_BoneWeights1), 4, GL_FLOAT, GL_FALSE, sizeof(StdMeshVertex), VERTEX_OFFSET(bone_weight) + 4 * sizeof(std::remove_all_extents::type)); - glVertexAttribPointer(shader->GetAttribute(C4SSA_BoneIndices0), 4, GL_SHORT, GL_FALSE, sizeof(StdMeshVertex), VERTEX_OFFSET(bone_index)); - glVertexAttribPointer(shader->GetAttribute(C4SSA_BoneIndices1), 4, GL_SHORT, GL_FALSE, sizeof(StdMeshVertex), VERTEX_OFFSET(bone_index) + 4 * sizeof(std::remove_all_extents::type)); - glEnableVertexAttribArray(shader->GetAttribute(C4SSA_Position)); - glEnableVertexAttribArray(shader->GetAttribute(C4SSA_Normal)); + if (!ForceSoftwareTransform) + { + glVertexAttribPointer(shader->GetAttribute(C4SSA_Position), 3, GL_FLOAT, GL_FALSE, sizeof(StdMeshVertex), VERTEX_OFFSET(x)); + glVertexAttribPointer(shader->GetAttribute(C4SSA_Normal), 3, GL_FLOAT, GL_FALSE, sizeof(StdMeshVertex), VERTEX_OFFSET(nx)); + glEnableVertexAttribArray(shader->GetAttribute(C4SSA_Position)); + glEnableVertexAttribArray(shader->GetAttribute(C4SSA_Normal)); + + glVertexAttribPointer(shader->GetAttribute(C4SSA_BoneWeights0), 4, GL_FLOAT, GL_FALSE, sizeof(StdMeshVertex), VERTEX_OFFSET(bone_weight)); + glVertexAttribPointer(shader->GetAttribute(C4SSA_BoneWeights1), 4, GL_FLOAT, GL_FALSE, sizeof(StdMeshVertex), VERTEX_OFFSET(bone_weight) + 4 * sizeof(std::remove_all_extents::type)); + glVertexAttribPointer(shader->GetAttribute(C4SSA_BoneIndices0), 4, GL_SHORT, GL_FALSE, sizeof(StdMeshVertex), VERTEX_OFFSET(bone_index)); + glVertexAttribPointer(shader->GetAttribute(C4SSA_BoneIndices1), 4, GL_SHORT, GL_FALSE, sizeof(StdMeshVertex), VERTEX_OFFSET(bone_index) + 4 * sizeof(std::remove_all_extents::type)); + glEnableVertexAttribArray(shader->GetAttribute(C4SSA_BoneWeights0)); + glEnableVertexAttribArray(shader->GetAttribute(C4SSA_BoneWeights1)); + glEnableVertexAttribArray(shader->GetAttribute(C4SSA_BoneIndices0)); + glEnableVertexAttribArray(shader->GetAttribute(C4SSA_BoneIndices1)); + } if (shader->GetAttribute(C4SSA_TexCoord) != -1) glEnableVertexAttribArray(shader->GetAttribute(C4SSA_TexCoord)); - glEnableVertexAttribArray(shader->GetAttribute(C4SSA_BoneWeights0)); - glEnableVertexAttribArray(shader->GetAttribute(C4SSA_BoneWeights1)); - glEnableVertexAttribArray(shader->GetAttribute(C4SSA_BoneIndices0)); - glEnableVertexAttribArray(shader->GetAttribute(C4SSA_BoneIndices1)); + #undef VERTEX_OFFSET } + if (ForceSoftwareTransform) + { + glBindBuffer(GL_ARRAY_BUFFER, pretransform_vbo); +#define VERTEX_OFFSET(field) reinterpret_cast(offsetof(PretransformedMeshVertex, field)) + glVertexAttribPointer(shader->GetAttribute(C4SSA_Position), 3, GL_FLOAT, GL_FALSE, sizeof(PretransformedMeshVertex), VERTEX_OFFSET(x)); + glEnableVertexAttribArray(shader->GetAttribute(C4SSA_Position)); + glVertexAttribPointer(shader->GetAttribute(C4SSA_Normal), 3, GL_FLOAT, GL_FALSE, sizeof(PretransformedMeshVertex), VERTEX_OFFSET(nx)); + glEnableVertexAttribArray(shader->GetAttribute(C4SSA_Normal)); +#undef VERTEX_OFFSET + glBindBuffer(GL_ARRAY_BUFFER, 0); + } + // Bind textures for (unsigned int j = 0; j < pass.TextureUnits.size(); ++j) { @@ -860,6 +964,9 @@ namespace if(!pass.DepthCheck) glEnable(GL_DEPTH_TEST); } + + if (ForceSoftwareTransform) + glDeleteBuffers(1, &pretransform_vbo); } void RenderMeshImpl(const StdProjectionMatrix& projectionMatrix, const StdMeshMatrix& modelviewMatrix, StdMeshInstance& instance, DWORD dwModClr, DWORD dwBlitMode, DWORD dwPlayerColor, const C4FoWRegion* pFoW, const C4Rect& clipRect, const C4Rect& outRect, bool parity); // Needed by RenderAttachedMesh