Commit f3a8858e authored by hybrid's avatar hybrid

Fixed inline assembler for gcc in irrMath. Removed some code using the irrMath functions.

git-svn-id: svn://svn.code.sf.net/p/irrlicht/code/trunk@894 dfc29bdd-3216-0410-991c-e03cc46cb475
parent 8b0f65e1
...@@ -232,27 +232,30 @@ namespace core ...@@ -232,27 +232,30 @@ namespace core
} }
#ifdef IRRLICHT_FAST_MATH
REALINLINE void clearFPUException () REALINLINE void clearFPUException ()
{ {
#ifdef feclearexcept
feclearexcept(FE_ALL_EXCEPT);
#elif defined(_MSC_VER)
__asm fnclex; __asm fnclex;
#elif defined(__GNUC__)
__asm__ __volatile__ ("fclex \n\t");
#else
# warn clearFPUException not supported.
#endif
} }
// comes from Nvidia
#if 1
REALINLINE f32 reciprocal_squareroot(const f32 x) REALINLINE f32 reciprocal_squareroot(const f32 x)
{ {
#ifdef IRRLICHT_FAST_MATH
// comes from Nvidia
#if 1
u32 tmp = (u32(IEEE_1_0 << 1) + IEEE_1_0 - *(u32*)&x) >> 1; u32 tmp = (u32(IEEE_1_0 << 1) + IEEE_1_0 - *(u32*)&x) >> 1;
f32 y = *(f32*)&tmp; f32 y = *(f32*)&tmp;
return y * (1.47f - 0.47f * x * y * y); return y * (1.47f - 0.47f * x * y * y);
} #else
#endif // an sse2 version
// an sse2 version
#if 0
REALINLINE f32 reciprocal_squareroot(const f32 x)
{
__asm __asm
{ {
movss xmm0, x movss xmm0, x
...@@ -260,117 +263,137 @@ namespace core ...@@ -260,117 +263,137 @@ namespace core
movss x, xmm0 movss x, xmm0
} }
return x; return x;
}
#endif #endif
#else // no fast math
return 1.f / sqrtf ( x );
#endif
}
REALINLINE f32 reciprocal ( const f32 f )
{
#ifdef IRRLICHT_FAST_MATH
//! i do not divide through 0.. (fpu expection) //! i do not divide through 0.. (fpu expection)
// instead set f to a high value to get a return value near zero.. // instead set f to a high value to get a return value near zero..
// -1000000000000.f.. is use minus to stay negative.. // -1000000000000.f.. is use minus to stay negative..
// must test's here (plane.normal dot anything ) checks on <= 0.f // must test's here (plane.normal dot anything ) checks on <= 0.f
REALINLINE f32 reciprocal ( const f32 f )
{
return 1.f / f; return 1.f / f;
//u32 x = (-(AIR(f) != 0 ) >> 31 ) & ( IR(f) ^ 0xd368d4a5 ) ^ 0xd368d4a5; //u32 x = (-(AIR(f) != 0 ) >> 31 ) & ( IR(f) ^ 0xd368d4a5 ) ^ 0xd368d4a5;
//return 1.f / FR ( x ); //return 1.f / FR ( x );
#else // no fast math
return 1.f / f;
#endif
} }
REALINLINE f32 reciprocal_approxim ( const f32 p ) REALINLINE f32 reciprocal_approxim ( const f32 p )
{ {
#ifdef IRRLICHT_FAST_MATH
register u32 x = 0x7F000000 - IR ( p ); register u32 x = 0x7F000000 - IR ( p );
const f32 r = FR ( x ); const f32 r = FR ( x );
return r * (2.0f - p * r); return r * (2.0f - p * r);
#else // no fast math
return 1.f / p;
#endif
} }
REALINLINE s32 floor32(f32 x) REALINLINE s32 floor32(f32 x)
{ {
#ifdef IRRLICHT_FAST_MATH
const f32 h = 0.5f; const f32 h = 0.5f;
s32 t; s32 t;
#if defined(_MSC_VER)
__asm __asm
{ {
fld x fld x
fsub h fsub h
fistp t fistp t
} }
#elif defined(__GNUC__)
__asm__ __volatile__ (
"fsub %2 \n\t"
"fistpl %0"
: "=m" (t)
: "t" (x), "f" (h)
: "st"
);
#else
# warn IRRLICHT_FAST_MATH not supported.
return (s32) floorf ( x );
#endif
return t; return t;
#else // no fast math
return (s32) floorf ( x );
#endif
} }
REALINLINE s32 ceil32 ( f32 x ) REALINLINE s32 ceil32 ( f32 x )
{ {
#ifdef IRRLICHT_FAST_MATH
const f32 h = 0.5f; const f32 h = 0.5f;
s32 t; s32 t;
#if defined(_MSC_VER)
__asm __asm
{ {
fld x fld x
fadd h fadd h
fistp t fistp t
} }
#elif defined(__GNUC__)
__asm__ __volatile__ (
"fadd %2 \n\t"
"fistpl %0 \n\t"
: "=m"(t)
: "t"(x), "f"(h)
: "st"
);
#else
# warn IRRLICHT_FAST_MATH not supported.
return (s32) ceilf ( x );
#endif
return t; return t;
#else // not fast math
return (s32) ceilf ( x );
#endif
} }
REALINLINE s32 round32(f32 x) REALINLINE s32 round32(f32 x)
{ {
#if defined(IRRLICHT_FAST_MATH)
s32 t; s32 t;
#if defined(_MSC_VER)
__asm __asm
{ {
fld x fld x
fistp t fistp t
} }
#elif defined(__GNUC__)
return t; __asm__ __volatile__ (
} "fistpl %0 \n\t"
: "=m"(t)
: "t"(x)
: "st"
);
#else #else
# warn IRRLICHT_FAST_MATH not supported.
REALINLINE void clearFPUException () return (s32) round(x);
{ #endif
} return t;
#else // no fast math
return (s32) round(x);
inline f32 reciprocal_squareroot(const f32 x) #endif
{
return 1.f / sqrtf ( x );
}
inline f32 reciprocal ( const f32 x )
{
return 1.f / x;
}
inline f32 reciprocal_approxim ( const f32 x )
{
return 1.f / x;
}
inline s32 floor32 ( f32 x )
{
return (s32) floorf ( x );
}
inline s32 ceil32 ( f32 x )
{
return (s32) ceilf ( x );
}
inline s32 round32 ( f32 x )
{
return (s32) ( x + 0.5f );
} }
inline f32 f32_max3(const f32 a, const f32 b, const f32 c) inline f32 f32_max3(const f32 a, const f32 b, const f32 c)
{ {
return a > b ? (a > c ? a : c) : (b > c ? b : c); return a > b ? (a > c ? a : c) : (b > c ? b : c);
...@@ -381,8 +404,6 @@ namespace core ...@@ -381,8 +404,6 @@ namespace core
return a < b ? (a < c ? a : c) : (b < c ? b : c); return a < b ? (a < c ? a : c) : (b < c ? b : c);
} }
#endif
inline f32 fract ( f32 x ) inline f32 fract ( f32 x )
{ {
return x - floorf ( x ); return x - floorf ( x );
...@@ -390,10 +411,9 @@ namespace core ...@@ -390,10 +411,9 @@ namespace core
inline f32 round ( f32 x ) inline f32 round ( f32 x )
{ {
return floorf ( x + 0.5f ); return ::round(x);
} }
} // end namespace core } // end namespace core
} // end namespace irr } // end namespace irr
......
...@@ -1957,7 +1957,7 @@ public: ...@@ -1957,7 +1957,7 @@ public:
virtual void setString(const char* text) virtual void setString(const char* text)
{ {
sscanf(text, "0x%x", (int*)(&Value)); sscanf(text, "0x%x", (unsigned int*)(&Value));
} }
virtual E_ATTRIBUTE_TYPE getType() const virtual E_ATTRIBUTE_TYPE getType() const
......
...@@ -61,7 +61,7 @@ void COpenGLExtensionHandler::dump() const ...@@ -61,7 +61,7 @@ void COpenGLExtensionHandler::dump() const
void COpenGLExtensionHandler::initExtensions(bool stencilBuffer) void COpenGLExtensionHandler::initExtensions(bool stencilBuffer)
{ {
const f32 ver = core::fast_atof(reinterpret_cast<const c8*>(glGetString(GL_VERSION))); const f32 ver = core::fast_atof(reinterpret_cast<const c8*>(glGetString(GL_VERSION)));
Version = core::floor32(ver)*100+core::ceil32((ver-floor(ver))*10.0f); Version = core::floor32(ver)*100+core::ceil32(core::fract(ver)*10.0f);
if ( Version >= 102) if ( Version >= 102)
os::Printer::log("OpenGL driver version is 1.2 or better.", ELL_INFORMATION); os::Printer::log("OpenGL driver version is 1.2 or better.", ELL_INFORMATION);
else else
...@@ -375,7 +375,7 @@ void COpenGLExtensionHandler::initExtensions(bool stencilBuffer) ...@@ -375,7 +375,7 @@ void COpenGLExtensionHandler::initExtensions(bool stencilBuffer)
else else
{ {
const f32 ver = core::fast_atof(reinterpret_cast<const c8*>(shaderVersion)); const f32 ver = core::fast_atof(reinterpret_cast<const c8*>(shaderVersion));
ShaderLanguageVersion = core::floor32(ver)*100+core::ceil32((ver-floor(ver))*10.0f); ShaderLanguageVersion = core::floor32(ver)*100+core::ceil32(core::fract(ver)*10.0f);
} }
} }
#endif #endif
......
...@@ -989,13 +989,6 @@ inline f32 CQ3LevelMesh::Blend( const f64 s[3], const f64 t[3], const tBSPVertex ...@@ -989,13 +989,6 @@ inline f32 CQ3LevelMesh::Blend( const f64 s[3], const f64 t[3], const tBSPVertex
return (f32) res; return (f32) res;
} }
//!helper function
inline s32 s32_min ( s32 a, s32 b)
{
s32 mask = (a - b) >> 31;
return (a & mask) | (b & ~mask);
}
void CQ3LevelMesh::S3DVertex2TCoords_64::copyto ( video::S3DVertex2TCoords &dest ) const void CQ3LevelMesh::S3DVertex2TCoords_64::copyto ( video::S3DVertex2TCoords &dest ) const
{ {
dest.Pos.X = core::round ( (f32) Pos.X ); dest.Pos.X = core::round ( (f32) Pos.X );
...@@ -1041,10 +1034,10 @@ void CQ3LevelMesh::copy ( S3DVertex2TCoords_64 * dest, const tBSPVertex * source ...@@ -1041,10 +1034,10 @@ void CQ3LevelMesh::copy ( S3DVertex2TCoords_64 * dest, const tBSPVertex * source
if ( vertexcolor ) if ( vertexcolor )
{ {
u32 a = s32_min ( source->color[3] * quake3::defaultModulate, 255 ); u32 a = core::s32_min ( source->color[3] * quake3::defaultModulate, 255 );
u32 r = s32_min ( source->color[0] * quake3::defaultModulate, 255 ); u32 r = core::s32_min ( source->color[0] * quake3::defaultModulate, 255 );
u32 g = s32_min ( source->color[1] * quake3::defaultModulate, 255 ); u32 g = core::s32_min ( source->color[1] * quake3::defaultModulate, 255 );
u32 b = s32_min ( source->color[2] * quake3::defaultModulate, 255 ); u32 b = core::s32_min ( source->color[2] * quake3::defaultModulate, 255 );
dest->Color.set ( a * 1.f/255.f, dest->Color.set ( a * 1.f/255.f,
r * 1.f/255.f, r * 1.f/255.f,
...@@ -1082,10 +1075,10 @@ inline void CQ3LevelMesh::copy ( video::S3DVertex2TCoords * dest, const tBSPVert ...@@ -1082,10 +1075,10 @@ inline void CQ3LevelMesh::copy ( video::S3DVertex2TCoords * dest, const tBSPVert
if ( vertexcolor ) if ( vertexcolor )
{ {
u32 a = s32_min ( source->color[3] * quake3::defaultModulate, 255 ); u32 a = core::s32_min ( source->color[3] * quake3::defaultModulate, 255 );
u32 r = s32_min ( source->color[0] * quake3::defaultModulate, 255 ); u32 r = core::s32_min ( source->color[0] * quake3::defaultModulate, 255 );
u32 g = s32_min ( source->color[1] * quake3::defaultModulate, 255 ); u32 g = core::s32_min ( source->color[1] * quake3::defaultModulate, 255 );
u32 b = s32_min ( source->color[2] * quake3::defaultModulate, 255 ); u32 b = core::s32_min ( source->color[2] * quake3::defaultModulate, 255 );
dest->Color.color = a << 24 | r << 16 | g << 8 | b; dest->Color.color = a << 24 | r << 16 | g << 8 | b;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment