[692] | 1 | #ifndef __asm_math_H__
|
---|
| 2 | #define __asm_math_H__
|
---|
| 3 |
|
---|
| 4 | #include "OgrePrerequisites.h"
|
---|
| 5 |
|
---|
| 6 | /*=============================================================================
|
---|
| 7 | ASM math routines posted by davepermen et al on flipcode forums
|
---|
| 8 | =============================================================================*/
|
---|
| 9 |
|
---|
| 10 | const float pi = 4.0 * atan( 1.0 );
|
---|
| 11 | const float half_pi = 0.5 * pi;
|
---|
| 12 |
|
---|
| 13 | /*=============================================================================
|
---|
| 14 | NO EXPLICIT RETURN REQUIRED FROM THESE METHODS!!
|
---|
| 15 | =============================================================================*/
|
---|
| 16 | #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
|
---|
| 17 | # pragma warning( push )
|
---|
| 18 | # pragma warning( disable: 4035 )
|
---|
| 19 | #endif
|
---|
| 20 |
|
---|
| 21 | float asm_arccos( float r ) {
|
---|
| 22 | // return half_pi + arctan( r / -sqr( 1.f - r * r ) );
|
---|
| 23 |
|
---|
| 24 | #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
|
---|
| 25 |
|
---|
| 26 | float asm_one = 1.f;
|
---|
| 27 | float asm_half_pi = half_pi;
|
---|
| 28 | __asm {
|
---|
| 29 | fld r // r0 = r
|
---|
| 30 | fld r // r1 = r0, r0 = r
|
---|
| 31 | fmul r // r0 = r0 * r
|
---|
| 32 | fsubr asm_one // r0 = r0 - 1.f
|
---|
| 33 | fsqrt // r0 = sqrtf( r0 )
|
---|
| 34 | fchs // r0 = - r0
|
---|
| 35 | fdiv // r0 = r1 / r0
|
---|
| 36 | fld1 // {{ r0 = atan( r0 )
|
---|
| 37 | fpatan // }}
|
---|
| 38 | fadd asm_half_pi // r0 = r0 + pi / 2
|
---|
| 39 | } // returns r0
|
---|
| 40 |
|
---|
| 41 | #else
|
---|
| 42 |
|
---|
| 43 | return float( acos( r ) );
|
---|
| 44 |
|
---|
| 45 | #endif
|
---|
| 46 | }
|
---|
| 47 |
|
---|
| 48 | float asm_arcsin( float r ) {
|
---|
| 49 | // return arctan( r / sqr( 1.f - r * r ) );
|
---|
| 50 |
|
---|
| 51 | #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
|
---|
| 52 |
|
---|
| 53 | const float asm_one = 1.f;
|
---|
| 54 | __asm {
|
---|
| 55 | fld r // r0 = r
|
---|
| 56 | fld r // r1 = r0, r0 = r
|
---|
| 57 | fmul r // r0 = r0 * r
|
---|
| 58 | fsubr asm_one // r0 = r0 - 1.f
|
---|
| 59 | fsqrt // r0 = sqrtf( r0 )
|
---|
| 60 | fdiv // r0 = r1 / r0
|
---|
| 61 | fld1 // {{ r0 = atan( r0 )
|
---|
| 62 | fpatan // }}
|
---|
| 63 | } // returns r0
|
---|
| 64 |
|
---|
| 65 | #else
|
---|
| 66 |
|
---|
| 67 | return float( asin( r ) );
|
---|
| 68 |
|
---|
| 69 | #endif
|
---|
| 70 |
|
---|
| 71 | }
|
---|
| 72 |
|
---|
| 73 | float asm_arctan( float r ) {
|
---|
| 74 |
|
---|
| 75 | #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
|
---|
| 76 |
|
---|
| 77 | __asm {
|
---|
| 78 | fld r // r0 = r
|
---|
| 79 | fld1 // {{ r0 = atan( r0 )
|
---|
| 80 | fpatan // }}
|
---|
| 81 | } // returns r0
|
---|
| 82 |
|
---|
| 83 | #else
|
---|
| 84 |
|
---|
| 85 | return float( atan( r ) );
|
---|
| 86 |
|
---|
| 87 | #endif
|
---|
| 88 |
|
---|
| 89 | }
|
---|
| 90 |
|
---|
| 91 | float asm_sin( float r ) {
|
---|
| 92 |
|
---|
| 93 | #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
|
---|
| 94 |
|
---|
| 95 | __asm {
|
---|
| 96 | fld r // r0 = r
|
---|
| 97 | fsin // r0 = sinf( r0 )
|
---|
| 98 | } // returns r0
|
---|
| 99 |
|
---|
| 100 | #else
|
---|
| 101 |
|
---|
| 102 | return sin( r );
|
---|
| 103 |
|
---|
| 104 | #endif
|
---|
| 105 |
|
---|
| 106 | }
|
---|
| 107 |
|
---|
| 108 | float asm_cos( float r ) {
|
---|
| 109 |
|
---|
| 110 | #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
|
---|
| 111 |
|
---|
| 112 | __asm {
|
---|
| 113 | fld r // r0 = r
|
---|
| 114 | fcos // r0 = cosf( r0 )
|
---|
| 115 | } // returns r0
|
---|
| 116 |
|
---|
| 117 | #else
|
---|
| 118 |
|
---|
| 119 | return cos( r );
|
---|
| 120 |
|
---|
| 121 | #endif
|
---|
| 122 | }
|
---|
| 123 |
|
---|
| 124 | float asm_tan( float r ) {
|
---|
| 125 |
|
---|
| 126 | #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
|
---|
| 127 |
|
---|
| 128 | // return sin( r ) / cos( r );
|
---|
| 129 | __asm {
|
---|
| 130 | fld r // r0 = r
|
---|
| 131 | fsin // r0 = sinf( r0 )
|
---|
| 132 | fld r // r1 = r0, r0 = r
|
---|
| 133 | fcos // r0 = cosf( r0 )
|
---|
| 134 | fdiv // r0 = r1 / r0
|
---|
| 135 | } // returns r0
|
---|
| 136 |
|
---|
| 137 | #else
|
---|
| 138 |
|
---|
| 139 | return tan( r );
|
---|
| 140 |
|
---|
| 141 | #endif
|
---|
| 142 | }
|
---|
| 143 |
|
---|
| 144 | // returns a for a * a = r
|
---|
| 145 | float asm_sqrt( float r )
|
---|
| 146 | {
|
---|
| 147 | #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
|
---|
| 148 |
|
---|
| 149 | __asm {
|
---|
| 150 | fld r // r0 = r
|
---|
| 151 | fsqrt // r0 = sqrtf( r0 )
|
---|
| 152 | } // returns r0
|
---|
| 153 |
|
---|
| 154 | #else
|
---|
| 155 |
|
---|
| 156 | return sqrt( r );
|
---|
| 157 |
|
---|
| 158 | #endif
|
---|
| 159 | }
|
---|
| 160 |
|
---|
| 161 | // returns 1 / a for a * a = r
|
---|
| 162 | // -- Use this for Vector normalisation!!!
|
---|
| 163 | float asm_rsq( float r )
|
---|
| 164 | {
|
---|
| 165 | #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
|
---|
| 166 |
|
---|
| 167 | __asm {
|
---|
| 168 | fld1 // r0 = 1.f
|
---|
| 169 | fld r // r1 = r0, r0 = r
|
---|
| 170 | fsqrt // r0 = sqrtf( r0 )
|
---|
| 171 | fdiv // r0 = r1 / r0
|
---|
| 172 | } // returns r0
|
---|
| 173 |
|
---|
| 174 | #else
|
---|
| 175 |
|
---|
| 176 | return 1. / sqrt( r );
|
---|
| 177 |
|
---|
| 178 | #endif
|
---|
| 179 | }
|
---|
| 180 |
|
---|
| 181 | // returns 1 / a for a * a = r
|
---|
| 182 | // Another version
|
---|
| 183 | float apx_rsq( float r ) {
|
---|
| 184 |
|
---|
| 185 | #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
|
---|
| 186 |
|
---|
| 187 | const float asm_dot5 = 0.5f;
|
---|
| 188 | const float asm_1dot5 = 1.5f;
|
---|
| 189 |
|
---|
| 190 | __asm {
|
---|
| 191 | fld r // r0 = r
|
---|
| 192 | fmul asm_dot5 // r0 = r0 * .5f
|
---|
| 193 | mov eax, r // eax = r
|
---|
| 194 | shr eax, 0x1 // eax = eax >> 1
|
---|
| 195 | neg eax // eax = -eax
|
---|
| 196 | add eax, 0x5F400000 // eax = eax & MAGICAL NUMBER
|
---|
| 197 | mov r, eax // r = eax
|
---|
| 198 | fmul r // r0 = r0 * r
|
---|
| 199 | fmul r // r0 = r0 * r
|
---|
| 200 | fsubr asm_1dot5 // r0 = 1.5f - r0
|
---|
| 201 | fmul r // r0 = r0 * r
|
---|
| 202 | } // returns r0
|
---|
| 203 |
|
---|
| 204 | #else
|
---|
| 205 |
|
---|
| 206 | return 1. / sqrt( r );
|
---|
| 207 |
|
---|
| 208 | #endif
|
---|
| 209 | }
|
---|
| 210 |
|
---|
| 211 | /* very MS-specific, commented out for now
|
---|
| 212 | Finally the best InvSqrt implementation?
|
---|
| 213 | Use for vector normalisation instead of 1/length() * x,y,z
|
---|
| 214 | */
|
---|
| 215 | #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
|
---|
| 216 |
|
---|
| 217 | __declspec(naked) float __fastcall InvSqrt(float fValue)
|
---|
| 218 | {
|
---|
| 219 | __asm
|
---|
| 220 | {
|
---|
| 221 | mov eax, 0be6eb508h
|
---|
| 222 | mov dword ptr[esp-12],03fc00000h
|
---|
| 223 | sub eax, dword ptr[esp + 4]
|
---|
| 224 | sub dword ptr[esp+4], 800000h
|
---|
| 225 | shr eax, 1
|
---|
| 226 | mov dword ptr[esp - 8], eax
|
---|
| 227 |
|
---|
| 228 | fld dword ptr[esp - 8]
|
---|
| 229 | fmul st, st
|
---|
| 230 | fld dword ptr[esp - 8]
|
---|
| 231 | fxch st(1)
|
---|
| 232 | fmul dword ptr[esp + 4]
|
---|
| 233 | fld dword ptr[esp - 12]
|
---|
| 234 | fld st(0)
|
---|
| 235 | fsub st,st(2)
|
---|
| 236 |
|
---|
| 237 | fld st(1)
|
---|
| 238 | fxch st(1)
|
---|
| 239 | fmul st(3),st
|
---|
| 240 | fmul st(3),st
|
---|
| 241 | fmulp st(4),st
|
---|
| 242 | fsub st,st(2)
|
---|
| 243 |
|
---|
| 244 | fmul st(2),st
|
---|
| 245 | fmul st(3),st
|
---|
| 246 | fmulp st(2),st
|
---|
| 247 | fxch st(1)
|
---|
| 248 | fsubp st(1),st
|
---|
| 249 |
|
---|
| 250 | fmulp st(1), st
|
---|
| 251 | ret 4
|
---|
| 252 | }
|
---|
| 253 | }
|
---|
| 254 |
|
---|
| 255 | #endif
|
---|
| 256 |
|
---|
| 257 | // returns a random number
|
---|
| 258 | FORCEINLINE float asm_rand()
|
---|
| 259 | {
|
---|
| 260 |
|
---|
| 261 | #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
|
---|
| 262 | #if 0
|
---|
| 263 | #if OGRE_COMP_VER >= 1300
|
---|
| 264 |
|
---|
| 265 | static unsigned __int64 q = time( NULL );
|
---|
| 266 |
|
---|
| 267 | _asm {
|
---|
| 268 | movq mm0, q
|
---|
| 269 |
|
---|
| 270 | // do the magic MMX thing
|
---|
| 271 | pshufw mm1, mm0, 0x1E
|
---|
| 272 | paddd mm0, mm1
|
---|
| 273 |
|
---|
| 274 | // move to integer memory location and free MMX
|
---|
| 275 | movq q, mm0
|
---|
| 276 | emms
|
---|
| 277 | }
|
---|
| 278 |
|
---|
| 279 | return float( q );
|
---|
| 280 | #endif
|
---|
| 281 | #else
|
---|
| 282 | // VC6 does not support pshufw
|
---|
| 283 | return float( rand() );
|
---|
| 284 | #endif
|
---|
| 285 | #else
|
---|
| 286 | // GCC etc
|
---|
| 287 |
|
---|
| 288 | return float( rand() );
|
---|
| 289 |
|
---|
| 290 | #endif
|
---|
| 291 | }
|
---|
| 292 |
|
---|
| 293 | // returns the maximum random number
|
---|
| 294 | FORCEINLINE float asm_rand_max()
|
---|
| 295 | {
|
---|
| 296 |
|
---|
| 297 | #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
|
---|
| 298 | #if 0
|
---|
| 299 | #if OGRE_COMP_VER >= 1300
|
---|
| 300 |
|
---|
| 301 | return std::numeric_limits< unsigned __int64 >::max();
|
---|
| 302 | return 9223372036854775807.0f;
|
---|
| 303 | #endif
|
---|
| 304 | #else
|
---|
| 305 | // VC6 does not support unsigned __int64
|
---|
| 306 | return float( RAND_MAX );
|
---|
| 307 | #endif
|
---|
| 308 |
|
---|
| 309 | #else
|
---|
| 310 | // GCC etc
|
---|
| 311 | return float( RAND_MAX );
|
---|
| 312 |
|
---|
| 313 | #endif
|
---|
| 314 | }
|
---|
| 315 |
|
---|
| 316 | // returns log2( r ) / log2( e )
|
---|
| 317 | float asm_ln( float r ) {
|
---|
| 318 |
|
---|
| 319 | #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
|
---|
| 320 |
|
---|
| 321 | const float asm_e = 2.71828182846f;
|
---|
| 322 | const float asm_1_div_log2_e = .693147180559f;
|
---|
| 323 | const float asm_neg1_div_3 = -.33333333333333333333333333333f;
|
---|
| 324 | const float asm_neg2_div_3 = -.66666666666666666666666666667f;
|
---|
| 325 | const float asm_2 = 2.f;
|
---|
| 326 |
|
---|
| 327 | int log_2 = 0;
|
---|
| 328 |
|
---|
| 329 | __asm {
|
---|
| 330 | // log_2 = ( ( r >> 0x17 ) & 0xFF ) - 0x80;
|
---|
| 331 | mov eax, r
|
---|
| 332 | sar eax, 0x17
|
---|
| 333 | and eax, 0xFF
|
---|
| 334 | sub eax, 0x80
|
---|
| 335 | mov log_2, eax
|
---|
| 336 |
|
---|
| 337 | // r = ( r & 0x807fffff ) + 0x3f800000;
|
---|
| 338 | mov ebx, r
|
---|
| 339 | and ebx, 0x807FFFFF
|
---|
| 340 | add ebx, 0x3F800000
|
---|
| 341 | mov r, ebx
|
---|
| 342 |
|
---|
| 343 | // r = ( asm_neg1_div_3 * r + asm_2 ) * r + asm_neg2_div_3; // (1)
|
---|
| 344 | fld r
|
---|
| 345 | fmul asm_neg1_div_3
|
---|
| 346 | fadd asm_2
|
---|
| 347 | fmul r
|
---|
| 348 | fadd asm_neg2_div_3
|
---|
| 349 | fild log_2
|
---|
| 350 | fadd
|
---|
| 351 | fmul asm_1_div_log2_e
|
---|
| 352 | }
|
---|
| 353 |
|
---|
| 354 | #else
|
---|
| 355 |
|
---|
| 356 | return log( r );
|
---|
| 357 |
|
---|
| 358 | #endif
|
---|
| 359 | }
|
---|
| 360 |
|
---|
| 361 | #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
|
---|
| 362 | # pragma warning( pop )
|
---|
| 363 | #endif
|
---|
| 364 |
|
---|
| 365 | #endif
|
---|