/* ##H_FILE# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FILE : MTH_fopt.h MODULE : MTH (Common Mathematic Library) DESCRIPTION : Optimization for PC and float VERSION : MTH V5.0.13 / Alexandre LANGER [ALX] Ubi R&D / Add Comments ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ /* ##INCLUDE#---------------------------------------------------------------------------- Includes Files ---------------------------------------------------------------------------------------*/ #include #ifndef MTH_FOPT_H #define MTH_FOPT_H #include "acp_base.h" #if defined(__cplusplus) extern "C" { #endif /* For DLL : */ #include "cpa_expt.h" /* ##-############################### ## Compiler MTH Directive Summary ################################## */ /* Enable optimization : MTH_OPTIMIZE : Allow MTH optimization Enable dynamic mathematic check : MTH_CHECK : Checking In and Out mathematics validity Levels of precisions/optimisations : MTH_LOW : Faster MTH_MEDIUM : Some optimisation MTH_HIGH : Slow : no optimisation Targets : MTH_PC_DEV : PC machine MTH_U64_DEV : Nitendo 64 MTH_UNKOWN_DEV : Unkown machine Type of Real : MTH_RealIsFloat : float MTH_RealIsDouble : double MTH_RealIsFixed16_16 : Fixed 16-16 (Not Yet Implemented Compilators : VISUAL : Visual C WATCOM : Watcom C */ /* ##MACRO#---------------------------------------------------------------------------- MACRO definition ---------------------------------------------------------------------------------------*/ /* ##-####################################### ## Link Macros with to Optimized versions ########################################## */ /* ****************** */ /* *** MTH_M_xDiv *** */ /* ****************** */ #define MTH_M_xQuickDiv MTH_M_xDivLow #ifdef MTH_LOW #define MTH_M_xDivOpt MTH_M_xDivLow #endif /* MTH_LOW */ #ifdef MTH_MEDIUM #define MTH_M_xDivOpt MTH_M_xDivMedium #endif /* MTH_MEDIUM */ #ifdef MTH_HIGH #define MTH_M_xDivOpt MTH_M_xDivHigh #endif /* MTH_HIGH */ /* ****************** */ /* *** MTH_M_xSqrt ** */ /* ****************** */ #define MTH_M_xQuickSqrt MTH_M_xSqrtLow #ifdef MTH_LOW #define MTH_M_xSqrtOpt MTH_M_xSqrtLow #endif /* MTH_LOW */ #ifdef MTH_MEDIUM #define MTH_M_xSqrtOpt MTH_M_xSqrtMedium #endif /* MTH_MEDIUM */ #ifdef MTH_HIGH #define MTH_M_xSqrtOpt MTH_M_xSqrtHigh #endif /* MTH_HIGH */ /* ********************* */ /* *** MTH_M_xInvSqrt ** */ /* ********************* */ #define MTH_M_xQuickInvSqrt MTH_M_xInvSqrtLow #ifdef MTH_LOW #define MTH_M_xInvSqrtOpt MTH_M_xInvSqrtLow #endif /* MTH_LOW */ #ifdef MTH_MEDIUM #define MTH_M_xInvSqrtOpt MTH_M_xInvSqrtMedium #endif /* MTH_MEDIUM */ #ifdef MTH_HIGH #define MTH_M_xInvSqrtOpt MTH_M_xInvSqrtHigh #endif /* MTH_HIGH */ /* ***************** */ /* *** MTH_M_xInv ** */ /* ***************** */ #define MTH_M_xQuickInv MTH_M_xInvLow #ifdef MTH_LOW #define MTH_M_xInvOpt MTH_M_xInvLow #endif /* MTH_LOW */ #ifdef MTH_MEDIUM #define MTH_M_xInvOpt MTH_M_xInvMedium #endif /* MTH_MEDIUM */ #ifdef MTH_HIGH #define MTH_M_xInvOpt MTH_M_xInvHigh #endif /* MTH_HIGH */ /* ***************************** */ /* *** MTH_M_xRealToLongRound ** */ /* ***************************** */ #ifdef MTH_LOW #define MTH_M_xRealToLongRoundOpt MTH_M_xRealToLongRoundLow #endif /* MTH_LOW */ #ifdef MTH_MEDIUM #define MTH_M_xRealToLongRoundOpt MTH_M_xRealToLongRoundMedium #endif /* MTH_MEDIUM */ #ifdef MTH_HIGH #define MTH_M_xRealToLongRoundOpt MTH_M_xRealToLongRoundHigh #endif /* MTH_HIGH */ /* ************************ */ /* *** MTH_M_xRealToLong ** */ /* ************************ */ #ifdef MTH_LOW #define MTH_M_xRealToLongOpt MTH_M_xRealToLongLow #endif /* MTH_LOW */ #ifdef MTH_MEDIUM #define MTH_M_xRealToLongOpt MTH_M_xRealToLongMedium #endif /* MTH_MEDIUM */ #ifdef MTH_HIGH #define MTH_M_xRealToLongOpt MTH_M_xRealToLongHigh #endif /* MTH_HIGH */ /* ##-############################# ## Optimization implementations ################################ */ /* ##-############################# ## RealToLongRound ################################ */ /* ##M================================================================================== NAME : MTH_M_xRealToLongRoundLow DESCRIPTION : Return long round of a real number : Low precision INPUT : a : MTH_tdxReal OUTPUT : round(a) : long =======================================================================================*/ #define MTH_M_xRealToLongRoundLow MTH_fn_fFloat2LongRoundOpt /* ##M================================================================================== NAME : MTH_M_xRealToLongRoundMedium DESCRIPTION : Return long round of a real number : Medium precision INPUT : a : MTH_tdxReal OUTPUT : round(a) : long =======================================================================================*/ #define MTH_M_xRealToLongRoundMedium(A) (long) ( (long)(A*2.0F)-(long)(A) ) /* ##M================================================================================== NAME : MTH_M_xRealToLongRoundHigh DESCRIPTION : Return long round of a real number : High precision INPUT : a : MTH_tdxReal OUTPUT : round(a) : long =======================================================================================*/ #define MTH_M_xRealToLongRoundHigh(A) (long) ( (long)(A*2.0F)-(long)(A) ) /* ##-############################# ## RealToLong ################################ */ /* ##M================================================================================== NAME : MTH_M_xRealToLongLow DESCRIPTION : Return long cast of a real number : Low precision INPUT : a : MTH_tdxReal OUTPUT : round(a) : long =======================================================================================*/ #define MTH_M_xRealToLongLow MTH_fn_fFloat2LongOpt /* ##M================================================================================== NAME : MTH_M_xRealToLongMedium DESCRIPTION : Return long cast of a real number : Medium precision INPUT : a : MTH_tdxReal OUTPUT : round(a) : long =======================================================================================*/ #define MTH_M_xRealToLongMedium(X) (long) ( X ) /* ##M================================================================================== NAME : MTH_M_xRealToLongHigh DESCRIPTION : Return long cast of a real number : High precision INPUT : a : MTH_tdxReal OUTPUT : round(a) : long =======================================================================================*/ #define MTH_M_xRealToLongHigh(X) (long) ( X ) /* ##-############################# ## SQRT ################################ */ /* ** U64 : */ #ifdef U64 /* ##M================================================================================== NAME : MTH_M_xSqrtLow DESCRIPTION : Return the square root of a real number : Low precision/U64 INPUT : a : MTH_tdxReal OUTPUT : sqrt(a) : MTH_tdxReal =======================================================================================*/ #define MTH_M_xSqrtLow( A ) \ ( sqrtf( A ) ) /* ##M================================================================================== NAME : MTH_M_xSqrtMedium DESCRIPTION : Return the square root of a real number : Medium precision/U64 INPUT : a : MTH_tdxReal OUTPUT : sqrt(a) : MTH_tdxReal =======================================================================================*/ #define MTH_M_xSqrtMedium( A ) \ ( sqrtf( A ) ) /* ##M================================================================================== NAME : MTH_M_xSqrtHigh DESCRIPTION : Return the square root of a real number : High precision/U64 INPUT : a : MTH_tdxReal OUTPUT : sqrt(a) : MTH_tdxReal =======================================================================================*/ #define MTH_M_xSqrtHigh( A ) \ ( sqrtf( A ) ) #else /* NOT U64 => PC */ /* ** PC : */ /* ##M================================================================================== NAME : MTH_M_xSqrtLow DESCRIPTION : Return the square root of a real number : Low precision/PC INPUT : a : MTH_tdxReal OUTPUT : sqrt(a) : MTH_tdxReal =======================================================================================*/ #define MTH_M_xSqrtLow( A ) \ ( MTH_fn_fSquareRootOpt( A ) ) /* ##M================================================================================== NAME : MTH_M_xSqrtMedium DESCRIPTION : Return the square root of a real number : Medium precision/PC INPUT : a : MTH_tdxReal OUTPUT : sqrt(a) : MTH_tdxReal =======================================================================================*/ #ifdef MTH_CHECK #define MTH_M_xSqrtMedium( A ) MTH_fn_xSqrtCHK( A ) #else /* NOT MTH_CHECK */ #define MTH_M_xSqrtMedium( A ) \ ( (float) sqrt((double) (A)) ) #endif /* MTH_CHECK OR NOT */ /* ##M================================================================================== NAME : MTH_M_xSqrtHigh DESCRIPTION : Return the square root of a real number : High precision/PC INPUT : a : MTH_tdxReal OUTPUT : sqrt(a) : MTH_tdxReal =======================================================================================*/ #ifdef MTH_CHECK #define MTH_M_xSqrtHigh( A ) MTH_fn_xSqrtCHK( A ) #else /* NOT MTH_CHECK */ #define MTH_M_xSqrtHigh( A ) \ ( (float) sqrt((double) (A)) ) #endif /* MTH_CHECK OR NOT */ #endif /* U64 OR PC */ /* ##-############################# ## INV ################################ */ /* ** U64 : */ #ifdef U64 /* ##M================================================================================== NAME : MTH_M_xInvLow DESCRIPTION : Return the inverse of a real number : Low precision/U64 INPUT : a : MTH_tdxReal OUTPUT : 1/a : MTH_tdxReal =======================================================================================*/ #define MTH_M_xInvLow( A ) \ ( MTH_C_ONE / (A) ) /* ##M================================================================================== NAME : MTH_M_xInvMedium DESCRIPTION : Return the inverse of a real number : Medium precision/U64 INPUT : a : MTH_tdxReal OUTPUT : 1/a : MTH_tdxReal =======================================================================================*/ #define MTH_M_xInvMedium( A ) \ ( MTH_C_ONE / (A) ) /* ##M================================================================================== NAME : MTH_M_xInvHigh DESCRIPTION : Return the inverse of a real number : High precision/U64 INPUT : a : MTH_tdxReal OUTPUT : 1/a : MTH_tdxReal =======================================================================================*/ #define MTH_M_xInvHigh( A ) \ ( MTH_C_ONE / (A) ) #else /* NOT U64 => PC */ /* ** PC : */ /* ##M================================================================================== NAME : MTH_M_xInvLow DESCRIPTION : Return the inverse of a real number : Low precision/PC INPUT : a : MTH_tdxReal OUTPUT : 1/a : MTH_tdxReal =======================================================================================*/ #define MTH_M_xInvLow( A ) \ ( MTH_fn_fInverseOpt( A ) ) /* ##M================================================================================== NAME : MTH_M_xInvMedium DESCRIPTION : Return the inverse of a real number : Medium precision/PC INPUT : a : MTH_tdxReal OUTPUT : 1/a : MTH_tdxReal =======================================================================================*/ #ifdef MTH_CHECK #define MTH_M_xInvMedium( A ) MTH_fn_xInvCHK( A ) #else /* NOT MTH_CHECK */ #define MTH_M_xInvMedium( A ) \ ( MTH_C_ONE / (A) ) #endif /* MTH_CHECK OR NOT */ /* ##M================================================================================== NAME : MTH_M_xInvHigh DESCRIPTION : Return the inverse of a real number : High precision/PC INPUT : a : MTH_tdxReal OUTPUT : 1/a : MTH_tdxReal =======================================================================================*/ #ifdef MTH_CHECK #define MTH_M_xInvHigh( A ) MTH_fn_xInvCHK( A ) #else /* NOT MTH_CHECK */ #define MTH_M_xInvHigh( A ) \ ( MTH_C_ONE / (A) ) #endif /* MTH_CHECK OR NOT */ #endif /* U64 OR PC */ /* ##-############################# ## DIV ################################ */ /* ** U64 : */ #ifdef U64 /* ##M================================================================================== NAME : MTH_M_xDivLow DESCRIPTION : Return the division of two real numbers : Low precision/U64 INPUT : a, b : 2 MTH_tdxReal OUTPUT : a / b : MTH_tdxReal =======================================================================================*/ #define MTH_M_xDivLow( A, B) \ ((A) / (B)) /* ##M================================================================================== NAME : MTH_M_xDivMedium DESCRIPTION : Return the division of two real numbers : Medium precision/U64 INPUT : a, b : 2 MTH_tdxReal OUTPUT : a / b : MTH_tdxReal =======================================================================================*/ #define MTH_M_xDivMedium( A, B) \ ((A) / (B)) /* ##M================================================================================== NAME : MTH_M_xDivHigh DESCRIPTION : Return the division of two real numbers : High precision/U64 INPUT : a, b : 2 MTH_tdxReal OUTPUT : a / b : MTH_tdxReal =======================================================================================*/ #define MTH_M_xDivHigh( A, B) \ ((A) / (B)) #else /* NOT U64 => PC */ /* ** PC : */ /* ##M================================================================================== NAME : MTH_M_xDivLow DESCRIPTION : Return the division of two real numbers : Low precision/PC INPUT : a, b : 2 MTH_tdxReal OUTPUT : a / b : MTH_tdxReal =======================================================================================*/ #define MTH_M_xDivLow( A, B ) \ ( MTH_M_xMul( (A), ( MTH_fn_fInverseOpt( B ) ) ) ) /* ##M================================================================================== NAME : MTH_M_xDivMedium DESCRIPTION : Return the division of two real numbers : Medium precision/PC INPUT : a, b : 2 MTH_tdxReal OUTPUT : a / b : MTH_tdxReal =======================================================================================*/ #ifdef MTH_CHECK #define MTH_M_xDivMedium( A, B) MTH_fn_xDivCHK( A, B) #else /* NOT MTH_CHECK */ #define MTH_M_xDivMedium( A, B) \ ((A) / (B)) #endif /* MTH_CHECK OR NOT */ /* ##M================================================================================== NAME : MTH_M_xDivHigh DESCRIPTION : Return the division of two real numbers : High precision/PC INPUT : a, b : 2 MTH_tdxReal OUTPUT : a / b : MTH_tdxReal =======================================================================================*/ #ifdef MTH_CHECK #define MTH_M_xDivHigh( A, B) MTH_fn_xDivCHK( A, B) #else /* NOT MTH_CHECK */ #define MTH_M_xDivHigh( A, B) \ ((A) / (B)) #endif /* MTH_CHECK OR NOT */ #endif /* U64 OR PC */ /* ##-############################# ## INV SQRT ################################ */ /* ** U64 : */ #ifdef U64 /* ##M================================================================================== NAME : MTH_M_xInvSqrtLow DESCRIPTION : Return the inverse of the square root of a real number : Low precision/U64 INPUT : a : MTH_tdxReal OUTPUT : 1/sqrt(a) : MTH_tdxReal =======================================================================================*/ #define MTH_M_xInvSqrtLow( A ) \ ( MTH_C_ONE / sqrtf( A ) ) /* ##M================================================================================== NAME : MTH_M_xInvSqrtMedium DESCRIPTION : Return the inverse of the square root of a real number : Medium precision/U64 INPUT : a : MTH_tdxReal OUTPUT : 1/sqrt(a) : MTH_tdxReal =======================================================================================*/ #define MTH_M_xInvSqrtMedium( A ) \ ( MTH_C_ONE / sqrtf( A ) ) /* ##M================================================================================== NAME : MTH_M_xInvSqrtHigh DESCRIPTION : Return the inverse of the square root of a real number : High precision/U64 INPUT : a : MTH_tdxReal OUTPUT : 1/sqrt(a) : MTH_tdxReal =======================================================================================*/ #define MTH_M_xInvSqrtHigh( A ) \ ( MTH_C_ONE / sqrtf( A ) ) #else /* NOT U64 => PC */ /* ** PC : */ /* ##M================================================================================== NAME : MTH_M_xInvSqrtLow DESCRIPTION : Return the inverse of the square root of a real number : Low precision/PC INPUT : a : MTH_tdxReal OUTPUT : 1/sqrt(a) : MTH_tdxReal =======================================================================================*/ #define MTH_M_xInvSqrtLow( A ) \ ( MTH_fn_fInvSquareRootOpt( A ) ) /* ##M================================================================================== NAME : MTH_M_xInvSqrtMedium DESCRIPTION : Return the inverse of the square root of a real number : Medium precision/PC INPUT : a : MTH_tdxReal OUTPUT : 1/sqrt(a) : MTH_tdxReal =======================================================================================*/ #ifdef MTH_CHECK #define MTH_M_xInvSqrtMedium( A ) MTH_fn_xInvSqrtCHK( A ) #else /* NOT MTH_CHECK */ #define MTH_M_xInvSqrtMedium( A ) \ ( MTH_C_ONE / (float) sqrt((double) (A)) ) #endif /* MTH_CHECK OR NOT */ /* ##M================================================================================== NAME : MTH_M_xInvSqrtHigh DESCRIPTION : Return the inverse of the square root of a real number : High precision/PC INPUT : a : MTH_tdxReal OUTPUT : 1/sqrt(a) : MTH_tdxReal =======================================================================================*/ #ifdef MTH_CHECK #define MTH_M_xInvSqrtHigh( A ) MTH_fn_xInvSqrtCHK( A ) #else /* NOT MTH_CHECK */ #define MTH_M_xInvSqrtHigh( A ) \ ( MTH_C_ONE / (float) sqrt((double) (A)) ) #endif /* MTH_CHECK OR NOT */ #endif /* U64 OR PC */ /* ##-############################# ## INIT ################################ */ /* ##M================================================================================== NAME : MTH_M_vInit DESCRIPTION : Initialization INPUT : void OUTPUT : void =======================================================================================*/ #ifdef U64 #define MTH_M_vInit() \ {} #else /* PC */ /* LOW : */ #ifdef MTH_LOW #define MTH_M_vInit() \ { MTH_fn_vInit(); \ } #endif /* MTH_LOW */ /* MEDIUM : */ #ifdef MTH_MEDIUM #define MTH_M_vInit() \ { MTH_fn_vInit(); \ } #endif /* MTH_MEDIUM */ /* HIGH : */ #ifdef MTH_HIGH #define MTH_M_vInit() \ { MTH_fn_vInit(); \ } #endif /* MTH_HIGH */ #endif /* U64 OR PC */ /* --------------------------------------------------------------------------------------- */ /* ------------ This part is not Public, but is here for __inline function --------------- */ /* ------------ !!! DO NOT USE THIS DIRECTLY !!! - FOR PC ONLY --------------------------- */ /* ------------ WARNING : these functions can disrupting others algoritms ---------------- */ /* --------------------------------------------------------------------------------------- */ /* #define MTH_PARANOID */ /* Uncomment this to armor code */ /* NOT for U64 */ #ifndef U64 /* ##-########################### ## Set Pentium FPU Precision ############################## */ /* ##F=================================================================================== NAME : MTH_fn_vSet24bitFPU DESCRIPTION : Set Pentium FPU internal precision to 24bit INPUT : void OUTPUT : void =======================================================================================*/ INLINE void MTH_CALL MTH_fn_vSet24bitFPU(void) { long memvar; _asm{ #ifdef MTH_PARANOID push eax #endif /* MTH_PARANOID */ finit fwait fstcw [memvar] fwait mov eax, [memvar] and eax, 0fffffcffh mov [memvar], eax fldcw [memvar] fwait #ifdef MTH_PARANOID pop eax #endif /* MTH_PARANOID */ } } /* ##F=================================================================================== NAME : MTH_fn_vSet53bitFPU DESCRIPTION : Set Pentium FPU internal precision to 53bit INPUT : void OUTPUT : void =======================================================================================*/ INLINE void MTH_CALL MTH_fn_vSet53bitFPU(void) { long memvar; _asm{ #ifdef MTH_PARANOID push eax #endif /* MTH_PARANOID */ finit fwait fstcw [memvar] fwait mov eax, [memvar] and eax, 0fffffeffh mov [memvar], eax fldcw [memvar] fwait #ifdef MTH_PARANOID pop eax #endif /* MTH_PARANOID */ } } /* ##F=================================================================================== NAME : MTH_fn_vSet64bitFPU DESCRIPTION : Set Pentium FPU internal precision to 64bit INPUT : void OUTPUT : void =======================================================================================*/ INLINE void MTH_CALL MTH_fn_vSet64bitFPU(void) { long memvar; _asm{ #ifdef MTH_PARANOID push eax #endif /* MTH_PARANOID */ finit fwait fstcw [memvar] fwait mov eax, [memvar] and eax, 0ffffffffh mov [memvar], eax fldcw [memvar] fwait #ifdef MTH_PARANOID pop eax #endif /* MTH_PARANOID */ } } /* ##-########################### ## FloatToLong ############################## */ /* WARNING : These functions are experimental and for PC Only. Try it, measure performance and degradation before making your choice. Both C and ASM version are avaible, C version is faster than ASM version for Visual C, because compiler can optimise with surround code. */ extern CPA_EXPORT double MTH_gs_dDecal; /* = 3.0F*pow(2, 51); */ /* ##F=================================================================================== NAME : MTH_fn_fFloat2LongRoundOptASM DESCRIPTION : Return long round of a real number : asm version Do not use this function ! MTH_fn_fFloat2LongRoundOpt is more efficient. INPUT : a : float OUTPUT : round(a) : long =======================================================================================*/ INLINE long MTH_fn_fFloat2LongRoundOptASM (float f) { long res; _asm{ fld dword ptr f fadd qword ptr MTH_gs_dDecal fstp qword ptr res } return (res); } /* ##F=================================================================================== NAME : MTH_fn_fFloat2LongRoundOpt DESCRIPTION : Return long round of a real number : C version (more optimized than asm) Exemples : 7.7 return 8, -7.7 return -8 INPUT : a : float OUTPUT : round(a) : long =======================================================================================*/ INLINE long MTH_fn_fFloat2LongRoundOpt (double f) { long res; MTH_M_vCHK(f); f+= MTH_gs_dDecal; res= *(long *)&f; return (res); } /* ##F=================================================================================== NAME : MTH_fn_fFloat2LongOptASM DESCRIPTION : Return long truncation of a real number : asm version Do not use this function ! MTH_fn_fFloat2LongOpt is more efficient. INPUT : a : float OUTPUT : (long)(a) : long =======================================================================================*/ INLINE long MTH_fn_fFloat2LongOptASM (float f) { long res; double m=0.49999999; _asm{ mov ebx, f test ebx, 0x80000000 jne F2L_negativ fld dword ptr f fsub qword ptr m fadd qword ptr MTH_gs_dDecal fstp qword ptr res jmp F2L_fin F2L_negativ: and ebx, 0x7FFFFFFF mov f, ebx fld dword ptr f fsub qword ptr m fadd qword ptr MTH_gs_dDecal xor ebx, ebx fstp qword ptr res sub ebx, res mov res, ebx F2L_fin : } return (res); } /* ##F=================================================================================== NAME : MTH_fn_fFloat2LongRoundOpt DESCRIPTION : Return long truncation of a real number : C version (more optimized than asm) Examples : 7.7 return 7, -7.7 return -7 Warning : Precision of 0.000001 INPUT : a : float OUTPUT : (long)(a) : long =======================================================================================*/ INLINE long MTH_fn_fFloat2LongOpt (double f) { long res; static double m=0.499999; /* Do not add `9` unless you decrease outpout domaine */ MTH_M_vCHK(f); if( f>0) { f-= m; f+= MTH_gs_dDecal; res= *(long *)&f; return (res); } else { f= -f; f-= m; f+= MTH_gs_dDecal; res= *(long *)&f; return (-res); } } /* ##-########################### ## TABLE BASED OPTIMIZATION ############################## */ /* WARNING : These functions are experimental and for PC Only. Performance depend of context ! Best performance for serial calculous (less cache miss). Try it, measure performance and degradation before making your choice. */ extern CPA_EXPORT unsigned long MTH_g_a2048_fSquareRootTable[1024*2]; /* ##F=================================================================================== NAME : MTH_fn_fSquareRootOpt DESCRIPTION : Return square root of a real number : optimized with table. INPUT : a : float OUTPUT : sqrt(a) : float =======================================================================================*/ INLINE float MTH_CALL MTH_fn_fSquareRootOpt(float f) { float res_sqrt; MTH_M_vCHK(f); /* f= (-1)^s.2^E.[1.M] */ _asm{ #ifdef MTH_PARANOID push ebx push eax #endif /* MTH_PARANOID */ mov ebx,f /* ebx = f */ mov eax,f /* eax = f */ and ebx,0x7F800000 /* ebx = E */ and eax,0x00FFE000 /* eax = 1st bit of E & M */ add ebx,0x3F800000 /* ebx= E + (127<<23) */ shr ebx,1 /* ebx = ebx/2 */ shr eax,11 /* eax = index on table */ and ebx,0x7F800000 /* ebx = new E */ add ebx,dword ptr[MTH_g_a2048_fSquareRootTable+eax] /* Get from table */ mov dword ptr[res_sqrt],ebx #ifdef MTH_PARANOID pop eax pop ebx #endif /* MTH_PARANOID */ } MTH_M_vCHK(res_sqrt); return (res_sqrt); } extern CPA_EXPORT unsigned long MTH_g_a1024_fInverse[1024]; /* ##F=================================================================================== NAME : MTH_fn_fInverseOpt DESCRIPTION : Return inverse of a real number : optimized with table. INPUT : a : float OUTPUT : 1/a : float =======================================================================================*/ INLINE float MTH_CALL MTH_fn_fInverseOpt(float f) { float res_inv; MTH_M_vCHK(f); _asm{ #ifdef MTH_PARANOID push ecx push ebx push eax #endif /* MTH_PARANOID */ mov ebx,f mov ecx,0x7E800000 /* 1 Clocks */ mov eax,ebx and eax,0x007FE000 /* 1 Clocks */ and ebx,0xFF800000 shr eax,11 /* 1 Clocks */ sub ecx,ebx add ecx,dword ptr[MTH_g_a1024_fInverse + eax] /* 3 Clocks Exp_AGI_U_Pem:1 */ mov dword ptr[res_inv],ecx /* 1 Clocks Exp_Flow_Dep_ecx */ #ifdef MTH_PARANOID pop eax pop ebx pop ecx #endif /* MTH_PARANOID */ } MTH_M_vCHK(res_inv); return (res_inv); } extern CPA_EXPORT unsigned long MTH_g_a2048_fInvSquareRootTable[1024*2]; /* ##F=================================================================================== NAME : MTH_fn_fInvSquareRootOpt DESCRIPTION : Return inverse square root of a real number : optimized with table. INPUT : a : float OUTPUT : 1/sqrt(a) : float =======================================================================================*/ INLINE float MTH_CALL MTH_fn_fInvSquareRootOpt(float f) { float res_invsqrt; /* To test vality of this function : float res_high; char c_test[30]; */ MTH_M_vCHK(f); /* f= (-1)^s.2^E.[1.M] */ _asm{ #ifdef MTH_PARANOID push ecx push ebx push eax #endif /* MTH_PARANOID */ mov ecx,f /* ecx = f */ mov eax,f /* eax = f, to allow pairing */ and ecx,0x7F800000 /* ecx = E */ mov ebx,0xBD800000 /* ebx= 379 << 23 */ and eax,0x00FFE000 /* 1st bit of E (odd/even) & 10 high of M */ sub ebx,ecx /* ebx= (379 << 23) -E */ shr ebx,1 /* ebx= ebx/2 */ shr eax,11 /* eax = index on table */ and ebx,0x7F800000 /* ebx = new E */ add ebx,dword ptr[MTH_g_a2048_fInvSquareRootTable + eax] /* Get from table */ mov dword ptr[res_invsqrt],ebx #ifdef MTH_PARANOID pop eax pop ebx pop ecx #endif /* MTH_PARANOID */ } /* res_high= 1.0F/sqrt((float)f); assert( abs(1.0F-res_high/res_invsqrt) <0.001 ); */ MTH_M_vCHK(res_invsqrt); return (res_invsqrt); } /* ##F=================================================================================== NAME : MTH_fn_vInit DESCRIPTION : Initialize tables INPUT : void OUTPUT : void =======================================================================================*/ extern CPA_EXPORT void MTH_fn_vInit( void ); /* ====================================================================================== NAME : MTH_M_bEqualZeroAsm DESCRIPTION : Tests if a float number equals zero INPUT : A : float OUTPUT : A==0 : unsigned char Author: Yann Le Tensorer sept 25,1998 Cycles: 4 on Pentium, 3 on Pentium II Note: Faster than the "normal" way for both pentium & Pentium II return type is unsigned char, so it is tested by visual C with "test al,al" =======================================================================================*/ #pragma warning( disable : 4035 ) static __inline unsigned char __fastcall MTH_M_bEqualZeroAsm( float A) { __asm { mov ecx,A mov al,0 ;default return value is 0 (not equal to 0) and ecx,0x7FFFFFFF ;very important: clear sign flag because negative zero also compares to zero... cmp ecx,1 ;cf is set only if ecx=0 (we compare to 1, so cf is set if ecx is strictly smaller then 1) adc al,0 ;add carry to al ;no need of return, default returned vaue is in al } } /* ====================================================================================== NAME : MTH_M_bDifferentZeroAsm DESCRIPTION : Tests if a float number is different from zero INPUT : A : float OUTPUT : A!=0 : unsigned char Author: Yann Le Tensorer sept 25,1998 Cycles: 4 on Pentium, 3 on Pentium II Note: Faster than the "normal" way for both pentium & Pentium II return type is unsigned char, so it is tested by visual C with "test al,al" =======================================================================================*/ static __inline unsigned char __fastcall MTH_M_bDifferentZeroAsm( float A) { __asm { mov ecx,A mov al,1 ;default return value is 0 (not equal to 0) and ecx,0x7FFFFFFF ;very important: clear sign flag because negative zero also compares to zero... cmp ecx,1 ;cf is set only if ecx=0 (we compare to 1, so cf is set if ecx is strictly smaller then 1) sbb al,0 ;sub carry to al ;no need of return, default returned vaue is in al } } /* ====================================================================================== NAME : MTH_M_bGreaterZeroAsm DESCRIPTION : Tests if a float number is strictly greater than zero INPUT : A : float OUTPUT : A>0 : unsigned char Author: Yann Le Tensorer sept 25,1998 Cycles: 3 on Pentium, 2 on Pentium II Note: Faster than the "normal" way for both pentium & Pentium II =======================================================================================*/ static __inline unsigned char __fastcall MTH_M_bGreaterZeroAsm( float A) { __asm { mov ecx,A mov al,0 ; default is less or equal zero sub ecx,1 ; cmp ecx,0x7fffffff ;key of the algorithm: cf is set if (A-1)<0x7fffffff adc al,0 ; add cf to al } } /* ====================================================================================== NAME : MTH_M_bLessOrEqualZeroAsm DESCRIPTION : Tests if a float number is strictly greater than zero INPUT : A : float OUTPUT : A<=0 : unsigned char Author: Yann Le Tensorer sept 25,1998 Cycles: 4 on Pentium, 3 on Pentium II Note: Faster than the "normal" way for both pentium & Pentium II =======================================================================================*/ static __inline unsigned char __fastcall MTH_M_bLessEqualZeroAsm( float A) { __asm { mov ecx,A mov al,1 ; default is less or equal zero sub ecx,1 ; cmp ecx,0x7fffffff ;key of the algorithm: cf is set if (A-1)<0x7fffffff sbb al,0 ; add cf to al } } /* ====================================================================================== NAME : MTH_M_bLessZeroAsm DESCRIPTION : Tests if a float number is strictly less than zero INPUT : A : float OUTPUT : A<0 : unsigned char Author: Yann Le Tensorer sept 25,1998 Cycles: 2 on Pentium,2 on Pentium II Note: Faster than the "normal" way for both pentium & Pentium II =======================================================================================*/ static __inline unsigned char __fastcall MTH_M_bLessZeroAsm( float A) { __asm { mov al,1 cmp A,0x80000001 sbb al,0 } } /* ====================================================================================== NAME : MTH_M_bGreaterEqualZeroAsm DESCRIPTION : Tests if a float number is strictly less than zero INPUT : A : float OUTPUT : A>=0 : unsigned char Author: Yann Le Tensorer sept 25,1998 Cycles: 2 on Pentium,2 on Pentium II Note: Faster than the "normal" way for both pentium & Pentium II =======================================================================================*/ static __inline unsigned char __fastcall MTH_M_bGreaterEqualZeroAsm( float A) { __asm { mov al,0 cmp A,0x80000001 adc al,0 } } #endif /* NOT U64 */ #if defined(__cplusplus) } #endif #endif /* MTH_FOPT_H */