I was comparing the performance with intensive vector-matrix operations with Bullet linear math and then with the Sony's SIMD Vectormath library which comes in extras folder.
In my machine (a 32bit Pentium 4) with SSE configuration I've found that the Bullet linear math is often faster (1.5 - 2 times faster) than Sony's SIMD Vectormath library, even if it claims that it is using SIMD.
Here is the code of the test demo:
Code: Select all
///Testfile to test differences between vectormath and Bullet LinearMath
#include "vectormath_aos.h"
#include "LinearMath/btTransform.h"
#include <stdio.h>
#include <stdlib.h>
#include "GL/glfw.h"
#define NUM_TESTS 100
#define NUM_OPERATIONS 10000
//Bullet, a btVector can be used for both points and vectors.
//it is up to the user/developer to use the right multiplication: btTransform for points, and btQuaternion or btMatrix3x3 for vectors.
void BulletTest()
{
btTransform tr;
tr.setIdentity();
tr.setOrigin(btVector3(10,0,0));
//initialization
btVector3 pointA(0,0,0);
btVector3 pointB,pointC,pointD,pointE;
btScalar x;
int i = NUM_OPERATIONS;
while(i--)
{
//transform over tr
pointB = tr(pointA);
//inverse transform
pointC = tr.inverse() * pointA;
//dot product
x = pointD.dot(pointE);
//square length
x = pointD.length2();
//length
x = pointD.length();
//in-place normalize pointD
pointD.normalize();
}
}
//vectormath makes a difference between point and vector.
void VectormathTest()
{
Vectormath::Aos::Transform3 tr = Vectormath::Aos::Transform3::identity();
tr.setTranslation(Vectormath::Aos::Vector3(10,0,0));
//initialization
Vectormath::Aos::Point3 pointA(0,0,0);
Vectormath::Aos::Point3 pointB,pointC,pointE;
Vectormath::Aos::Vector3 pointD;
btScalar x;
int i = NUM_OPERATIONS;
while(i--)
{
//transform over tr
pointB = tr * pointA;
//transform over tr
//inverse transform
pointC = Vectormath::Aos::inverse(tr) * pointA;
//dot product
x = Vectormath::Aos::dot(Vectormath::Aos::Vector3(pointD),Vectormath::Aos::Vector3(pointE));
//square length
x = Vectormath::Aos::lengthSqr(Vectormath::Aos::Vector3(pointD));
//length
x = Vectormath::Aos::length(Vectormath::Aos::Vector3(pointD));
//in-place normalize pointD
pointD = Vectormath::Aos::normalize(Vectormath::Aos::Vector3(pointD));
}
}
int main()
{
glfwInit( );
double start_time;
int i;
double end_time;
start_time = glfwGetTime();
printf("Elapsed time : %f ms",end_time-start_time);
{
printf("\n \n Vectormath\n");
start_time = glfwGetTime();
i = NUM_TESTS;
while(i--)
{
VectormathTest();
}
end_time = glfwGetTime();
}
printf("Elapsed time : %f ms",end_time-start_time);
{
printf("\n \n Bullet Linearmath\n");
start_time = glfwGetTime();
i = NUM_TESTS;
while(i--)
{
BulletTest();
}
end_time = glfwGetTime();
}
printf("Elapsed time : %f ms",end_time-start_time);
getchar();
glfwTerminate( );
return 0;
}
Really I don't know if I'm doing it wrong, but with these simple tests (whose include only vector multiplications and matrix transformations) is demonstrated that writting SIMD code doesn't worth the effort!
I have done these kind of tests before, some months ago, with other SIMD libraries such nvec and SIMDx86 and my personal implentation with macros, for finding out that those SIMD libraries are only a bloated code.
With this I can verify that the KISS (keep it simple stupid!! )rule applies to critical performance applications.
Sometime I heard from a programmer a very wise proverb, as he said: "The compiler is always smarter than you... just TRUST IT and don't make any stupid!!" so the compiler does optimizations in an smarter way, and I could be sure that it will use SIMD code even if I didn't ask for it.