forked from Mirrors/openclonk
Pre-compute Z values before face ordering (#984)
Pre-computed floating point numbers can be safely used in the comparison function, whereas recomputation every time the sort function is called might lead to a crash when the computed number is slightly differently every time, because the sort function would return different results for the same faces.stable-5.3
parent
fa68227e6e
commit
5dd2d45502
|
@ -22,10 +22,24 @@
|
||||||
#include <StdMesh.h>
|
#include <StdMesh.h>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
static int StdMeshFaceCmp(const StdMeshFace& face1, const StdMeshFace& face2);
|
namespace
|
||||||
|
{
|
||||||
|
struct StdMeshFaceOrderHelper
|
||||||
|
{
|
||||||
|
float z;
|
||||||
|
unsigned int i;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static int StdMeshFaceCmp(const StdMeshFaceOrderHelper& h1, const StdMeshFaceOrderHelper& h2)
|
||||||
|
{
|
||||||
|
if(h1.z < h2.z) return -1;
|
||||||
|
else if(h1.z > h2.z) return +1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
#define SORT_NAME StdMesh
|
#define SORT_NAME StdMesh
|
||||||
#define SORT_TYPE StdMeshFace
|
#define SORT_TYPE StdMeshFaceOrderHelper
|
||||||
#define SORT_CMP StdMeshFaceCmp
|
#define SORT_CMP StdMeshFaceCmp
|
||||||
#include "timsort/sort.h"
|
#include "timsort/sort.h"
|
||||||
|
|
||||||
|
@ -42,71 +56,80 @@ namespace
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Helper to sort faces for FaceOrdering
|
float StdMeshFaceOrderGetVertexZ(const StdMeshVertex& vtx, const StdMeshMatrix& trans)
|
||||||
struct StdMeshInstanceFaceOrderingCmpPred
|
|
||||||
{
|
{
|
||||||
const StdMeshVertex* m_vertices;
|
// TODO: Need to apply attach matrix in case of attached meshes
|
||||||
StdSubMeshInstance::FaceOrdering m_face_ordering;
|
|
||||||
const StdMeshMatrix& m_global_trans;
|
|
||||||
|
|
||||||
StdMeshInstanceFaceOrderingCmpPred(const StdMeshInstance& mesh_inst, const StdSubMeshInstance& sub_inst,
|
// We need to evaluate the Z coordinate of the transformed vertex
|
||||||
StdSubMeshInstance::FaceOrdering face_ordering, const StdMeshMatrix& global_trans):
|
// (for all three vertices of the two faces), something like
|
||||||
m_face_ordering(face_ordering), m_global_trans(global_trans)
|
// float z11 = (trans*m_vertices[face1.Vertices[0]]).z;
|
||||||
|
// However we don't do the full matrix multiplication as we are
|
||||||
|
// only interested in the Z coordinate of the result, also we are
|
||||||
|
// not interested in the resulting normals.
|
||||||
|
return trans(2,0)*vtx.x + trans(2,1)*vtx.y + trans(2,2)*vtx.z + trans(2,3);
|
||||||
|
}
|
||||||
|
|
||||||
|
float StdMeshFaceOrderGetFaceZ(const StdMeshVertex* vertices, const StdMeshFace& face, const StdMeshMatrix& trans)
|
||||||
|
{
|
||||||
|
const float z1 = StdMeshFaceOrderGetVertexZ(vertices[face.Vertices[0]], trans);
|
||||||
|
const float z2 = StdMeshFaceOrderGetVertexZ(vertices[face.Vertices[1]], trans);
|
||||||
|
const float z3 = StdMeshFaceOrderGetVertexZ(vertices[face.Vertices[2]], trans);
|
||||||
|
return std::max(std::max(z1, z2), z3);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SortFacesArray(const StdMeshVertex* vertices, std::vector<StdMeshFace>& faces, StdSubMeshInstance::FaceOrdering face_ordering, const StdMeshMatrix& trans)
|
||||||
|
{
|
||||||
|
if(faces.empty()) return;
|
||||||
|
|
||||||
|
std::vector<StdMeshFaceOrderHelper> helpers(faces.size());
|
||||||
|
for(unsigned int i = 0; i < faces.size(); ++i)
|
||||||
{
|
{
|
||||||
if(sub_inst.GetNumVertices() > 0)
|
helpers[i].i = i;
|
||||||
m_vertices = &sub_inst.GetVertices()[0];
|
helpers[i].z = StdMeshFaceOrderGetFaceZ(vertices, faces[i], trans);
|
||||||
else
|
|
||||||
m_vertices = &mesh_inst.GetSharedVertices()[0];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline float get_z(const StdMeshVertex& vtx) const
|
// The reason to use timsort here instead of std::sort is for performance
|
||||||
|
// reasons. This is performance critical code, with this function being
|
||||||
|
// called at least once per frame for each semi-transparent object. I have
|
||||||
|
// measured a factor 7 difference between the two sorting algorithms on my
|
||||||
|
// system.
|
||||||
|
|
||||||
|
// We also pre-compute the Z values that we use for sorting, and sort the
|
||||||
|
// array of Z values, then use the resorted indices to sort the original
|
||||||
|
// faces array. The reason for this is twofold:
|
||||||
|
// 1. We don't need to compute the Z value every time the comparison function
|
||||||
|
// is called. Even though the computation is not very expensive, we have
|
||||||
|
// to do many comparisons, and small things add up. I have measured a
|
||||||
|
// 5-10% performance benefit.
|
||||||
|
// 2. More importantly, due to floating point rounding errors we cannot guarantee
|
||||||
|
// that Z values computed in the sorting function always yield the exact same
|
||||||
|
// number, and the same sorting result for the same faces. This can lead to
|
||||||
|
// a crash, because the f(a1, a2) = -f(a2, a1) property for the sorting function
|
||||||
|
// would no longer be met, resulting in undefined behaviour in the sort call.
|
||||||
|
// See http://bugs.openclonk.org/view.php?id=984.
|
||||||
|
StdMesh_tim_sort(&helpers[0], helpers.size());
|
||||||
|
|
||||||
|
std::vector<StdMeshFace> new_faces(faces.size());
|
||||||
|
switch(face_ordering)
|
||||||
{
|
{
|
||||||
// We need to evaluate the Z coordinate of the transformed vertex
|
case StdSubMeshInstance::FO_Fixed:
|
||||||
// (for all three vertices of the two faces), something like
|
assert(false);
|
||||||
// float z11 = (m_global_trans*m_vertices[face1.Vertices[0]]).z;
|
break;
|
||||||
// However we don't do the full matrix multiplication as we are
|
case StdSubMeshInstance::FO_FarthestToNearest:
|
||||||
// only interested in the Z coordinate of the result, also we are
|
for(unsigned int i = 0; i < faces.size(); ++i)
|
||||||
// not interested in the resulting normals.
|
new_faces[i] = faces[helpers[i].i];
|
||||||
return m_global_trans(2,0)*vtx.x + m_global_trans(2,1)*vtx.y + m_global_trans(2,2)*vtx.z + m_global_trans(2,3);
|
break;
|
||||||
|
case StdSubMeshInstance::FO_NearestToFarthest:
|
||||||
|
for(unsigned int i = 0; i < faces.size(); ++i)
|
||||||
|
new_faces[i] = faces[helpers[faces.size() - i - 1].i];
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(false);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool operator()(const StdMeshFace& face1, const StdMeshFace& face2) const
|
faces.swap(new_faces);
|
||||||
{
|
}
|
||||||
return compare(face1, face2) < 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int compare(const StdMeshFace& face1, const StdMeshFace& face2) const
|
|
||||||
{
|
|
||||||
// TODO: Need to apply attach matrix in case of attached meshes
|
|
||||||
switch (m_face_ordering)
|
|
||||||
{
|
|
||||||
case StdSubMeshInstance::FO_Fixed:
|
|
||||||
assert(false);
|
|
||||||
return 0;
|
|
||||||
case StdSubMeshInstance::FO_FarthestToNearest:
|
|
||||||
case StdSubMeshInstance::FO_NearestToFarthest:
|
|
||||||
{
|
|
||||||
float z11 = get_z(m_vertices[face1.Vertices[0]]);
|
|
||||||
float z12 = get_z(m_vertices[face1.Vertices[1]]);
|
|
||||||
float z13 = get_z(m_vertices[face1.Vertices[2]]);
|
|
||||||
float z21 = get_z(m_vertices[face2.Vertices[0]]);
|
|
||||||
float z22 = get_z(m_vertices[face2.Vertices[1]]);
|
|
||||||
float z23 = get_z(m_vertices[face2.Vertices[2]]);
|
|
||||||
|
|
||||||
float z1 = std::max(std::max(z11, z12), z13);
|
|
||||||
float z2 = std::max(std::max(z21, z22), z23);
|
|
||||||
|
|
||||||
if (m_face_ordering == StdSubMeshInstance::FO_FarthestToNearest)
|
|
||||||
return (z1 < z2 ? -1 : (z1 > z2 ? +1 : 0));
|
|
||||||
else
|
|
||||||
return (z2 < z1 ? -1 : (z2 > z1 ? +1 : 0));
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
assert(false);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Serialize a ValueProvider with StdCompiler
|
// Serialize a ValueProvider with StdCompiler
|
||||||
struct ValueProviderAdapt
|
struct ValueProviderAdapt
|
||||||
|
@ -231,13 +254,6 @@ namespace
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
StdMeshInstanceFaceOrderingCmpPred* g_pred = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int StdMeshFaceCmp(const StdMeshFace& face1, const StdMeshFace& face2)
|
|
||||||
{
|
|
||||||
return g_pred->compare(face1, face2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
StdMeshTransformation StdMeshTrack::GetTransformAt(float time) const
|
StdMeshTransformation StdMeshTrack::GetTransformAt(float time) const
|
||||||
|
@ -447,10 +463,12 @@ void StdSubMeshInstance::LoadFacesForCompletion(StdMeshInstance& instance, const
|
||||||
// however we can simply give an appropriate transformation matrix to the face ordering.
|
// however we can simply give an appropriate transformation matrix to the face ordering.
|
||||||
// At this point, all vertices are in the OGRE coordinate frame, and Z in OGRE equals
|
// At this point, all vertices are in the OGRE coordinate frame, and Z in OGRE equals
|
||||||
// Y in Clonk, so we are fine without additional transformation.
|
// Y in Clonk, so we are fine without additional transformation.
|
||||||
StdMeshInstanceFaceOrderingCmpPred pred(instance, *this, FO_FarthestToNearest, StdMeshMatrix::Identity());
|
const StdMeshVertex* vertices;
|
||||||
g_pred = &pred;
|
if(GetNumVertices() > 0)
|
||||||
StdMesh_tim_sort(&Faces[0], Faces.size());
|
vertices = &GetVertices()[0];
|
||||||
g_pred = NULL;
|
else
|
||||||
|
vertices = &instance.GetSharedVertices()[0];
|
||||||
|
SortFacesArray(vertices, Faces, FO_FarthestToNearest, StdMeshMatrix::Identity());
|
||||||
|
|
||||||
// Third: Only use the first few ones
|
// Third: Only use the first few ones
|
||||||
assert(submesh.GetNumFaces() >= 1);
|
assert(submesh.GetNumFaces() >= 1);
|
||||||
|
@ -1181,24 +1199,12 @@ void StdMeshInstance::ReorderFaces(StdMeshMatrix* global_trans)
|
||||||
StdSubMeshInstance& inst = *SubMeshInstances[i];
|
StdSubMeshInstance& inst = *SubMeshInstances[i];
|
||||||
if(inst.CurrentFaceOrdering != StdSubMeshInstance::FO_Fixed)
|
if(inst.CurrentFaceOrdering != StdSubMeshInstance::FO_Fixed)
|
||||||
{
|
{
|
||||||
StdMeshInstanceFaceOrderingCmpPred pred(*this, inst, inst.CurrentFaceOrdering, global_trans ? *global_trans : StdMeshMatrix::Identity());
|
const StdMeshVertex* vertices;
|
||||||
|
if(inst.GetNumVertices() > 0)
|
||||||
// The usage of timsort instead of std::sort at this point is twofold.
|
vertices = &inst.GetVertices()[0];
|
||||||
// First, it's faster in our case where the array is already sorted in
|
else
|
||||||
// many cases (remember this is called at least once a frame).
|
vertices = &GetSharedVertices()[0];
|
||||||
// And it's not just a bit faster either but a lot. I have measured
|
SortFacesArray(vertices, inst.Faces, inst.CurrentFaceOrdering, global_trans ? *global_trans : StdMeshMatrix::Identity());
|
||||||
// a factor of 7 on my system.
|
|
||||||
// Second, in our Windows autobuilds there is a crash within std::sort
|
|
||||||
// which is very hard to debug because it's hardly reproducible with
|
|
||||||
// anything other than the autobuilds (I tried hard). If the crash goes
|
|
||||||
// away with timsort then great, if not then maybe it's easier to debug
|
|
||||||
// since the code is in our tree.
|
|
||||||
|
|
||||||
//std::sort(inst.Faces.begin(), inst.Faces.end(), pred);
|
|
||||||
|
|
||||||
g_pred = &pred;
|
|
||||||
StdMesh_tim_sort(&inst.Faces[0], inst.Faces.size());
|
|
||||||
g_pred = NULL;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue