reman3/Rayman_X/cpa/public/MTH/MTH_fopt.h

1168 lines
36 KiB
C

/* ##H_FILE#
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
FILE : MTH_fopt.h
MODULE : MTH (Common Mathematic Library)
DESCRIPTION : Optimization for PC and float
VERSION : MTH V5.0.13 / Alexandre LANGER [ALX] Ubi R&D / Add Comments
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
/* ##INCLUDE#----------------------------------------------------------------------------
Includes Files
---------------------------------------------------------------------------------------*/
#include <assert.h>
#ifndef MTH_FOPT_H
#define MTH_FOPT_H
#include "acp_base.h"
#if defined(__cplusplus)
extern "C"
{
#endif
/* For DLL : */
#include "cpa_expt.h"
/* ##-###############################
## Compiler MTH Directive Summary
################################## */
/*
Enable optimization :
MTH_OPTIMIZE : Allow MTH optimization
Enable dynamic mathematic check :
MTH_CHECK : Checking In and Out mathematics validity
Levels of precisions/optimisations :
MTH_LOW : Faster
MTH_MEDIUM : Some optimisation
MTH_HIGH : Slow : no optimisation
Targets :
MTH_PC_DEV : PC machine
MTH_U64_DEV : Nitendo 64
MTH_UNKOWN_DEV : Unkown machine
Type of Real :
MTH_RealIsFloat : float
MTH_RealIsDouble : double
MTH_RealIsFixed16_16 : Fixed 16-16 (Not Yet Implemented
Compilators :
VISUAL : Visual C
WATCOM : Watcom C
*/
/* ##MACRO#----------------------------------------------------------------------------
MACRO definition
---------------------------------------------------------------------------------------*/
/* ##-#######################################
## Link Macros with to Optimized versions
########################################## */
/* ****************** */
/* *** MTH_M_xDiv *** */
/* ****************** */
#define MTH_M_xQuickDiv MTH_M_xDivLow
#ifdef MTH_LOW
#define MTH_M_xDivOpt MTH_M_xDivLow
#endif /* MTH_LOW */
#ifdef MTH_MEDIUM
#define MTH_M_xDivOpt MTH_M_xDivMedium
#endif /* MTH_MEDIUM */
#ifdef MTH_HIGH
#define MTH_M_xDivOpt MTH_M_xDivHigh
#endif /* MTH_HIGH */
/* ****************** */
/* *** MTH_M_xSqrt ** */
/* ****************** */
#define MTH_M_xQuickSqrt MTH_M_xSqrtLow
#ifdef MTH_LOW
#define MTH_M_xSqrtOpt MTH_M_xSqrtLow
#endif /* MTH_LOW */
#ifdef MTH_MEDIUM
#define MTH_M_xSqrtOpt MTH_M_xSqrtMedium
#endif /* MTH_MEDIUM */
#ifdef MTH_HIGH
#define MTH_M_xSqrtOpt MTH_M_xSqrtHigh
#endif /* MTH_HIGH */
/* ********************* */
/* *** MTH_M_xInvSqrt ** */
/* ********************* */
#define MTH_M_xQuickInvSqrt MTH_M_xInvSqrtLow
#ifdef MTH_LOW
#define MTH_M_xInvSqrtOpt MTH_M_xInvSqrtLow
#endif /* MTH_LOW */
#ifdef MTH_MEDIUM
#define MTH_M_xInvSqrtOpt MTH_M_xInvSqrtMedium
#endif /* MTH_MEDIUM */
#ifdef MTH_HIGH
#define MTH_M_xInvSqrtOpt MTH_M_xInvSqrtHigh
#endif /* MTH_HIGH */
/* ***************** */
/* *** MTH_M_xInv ** */
/* ***************** */
#define MTH_M_xQuickInv MTH_M_xInvLow
#ifdef MTH_LOW
#define MTH_M_xInvOpt MTH_M_xInvLow
#endif /* MTH_LOW */
#ifdef MTH_MEDIUM
#define MTH_M_xInvOpt MTH_M_xInvMedium
#endif /* MTH_MEDIUM */
#ifdef MTH_HIGH
#define MTH_M_xInvOpt MTH_M_xInvHigh
#endif /* MTH_HIGH */
/* ***************************** */
/* *** MTH_M_xRealToLongRound ** */
/* ***************************** */
#ifdef MTH_LOW
#define MTH_M_xRealToLongRoundOpt MTH_M_xRealToLongRoundLow
#endif /* MTH_LOW */
#ifdef MTH_MEDIUM
#define MTH_M_xRealToLongRoundOpt MTH_M_xRealToLongRoundMedium
#endif /* MTH_MEDIUM */
#ifdef MTH_HIGH
#define MTH_M_xRealToLongRoundOpt MTH_M_xRealToLongRoundHigh
#endif /* MTH_HIGH */
/* ************************ */
/* *** MTH_M_xRealToLong ** */
/* ************************ */
#ifdef MTH_LOW
#define MTH_M_xRealToLongOpt MTH_M_xRealToLongLow
#endif /* MTH_LOW */
#ifdef MTH_MEDIUM
#define MTH_M_xRealToLongOpt MTH_M_xRealToLongMedium
#endif /* MTH_MEDIUM */
#ifdef MTH_HIGH
#define MTH_M_xRealToLongOpt MTH_M_xRealToLongHigh
#endif /* MTH_HIGH */
/* ##-#############################
## Optimization implementations
################################ */
/* ##-#############################
## RealToLongRound
################################ */
/* ##M==================================================================================
NAME : MTH_M_xRealToLongRoundLow
DESCRIPTION : Return long round of a real number : Low precision
INPUT : a : MTH_tdxReal
OUTPUT : round(a) : long
=======================================================================================*/
#define MTH_M_xRealToLongRoundLow MTH_fn_fFloat2LongRoundOpt
/* ##M==================================================================================
NAME : MTH_M_xRealToLongRoundMedium
DESCRIPTION : Return long round of a real number : Medium precision
INPUT : a : MTH_tdxReal
OUTPUT : round(a) : long
=======================================================================================*/
#define MTH_M_xRealToLongRoundMedium(A) (long) ( (long)(A*2.0F)-(long)(A) )
/* ##M==================================================================================
NAME : MTH_M_xRealToLongRoundHigh
DESCRIPTION : Return long round of a real number : High precision
INPUT : a : MTH_tdxReal
OUTPUT : round(a) : long
=======================================================================================*/
#define MTH_M_xRealToLongRoundHigh(A) (long) ( (long)(A*2.0F)-(long)(A) )
/* ##-#############################
## RealToLong
################################ */
/* ##M==================================================================================
NAME : MTH_M_xRealToLongLow
DESCRIPTION : Return long cast of a real number : Low precision
INPUT : a : MTH_tdxReal
OUTPUT : round(a) : long
=======================================================================================*/
#define MTH_M_xRealToLongLow MTH_fn_fFloat2LongOpt
/* ##M==================================================================================
NAME : MTH_M_xRealToLongMedium
DESCRIPTION : Return long cast of a real number : Medium precision
INPUT : a : MTH_tdxReal
OUTPUT : round(a) : long
=======================================================================================*/
#define MTH_M_xRealToLongMedium(X) (long) ( X )
/* ##M==================================================================================
NAME : MTH_M_xRealToLongHigh
DESCRIPTION : Return long cast of a real number : High precision
INPUT : a : MTH_tdxReal
OUTPUT : round(a) : long
=======================================================================================*/
#define MTH_M_xRealToLongHigh(X) (long) ( X )
/* ##-#############################
## SQRT
################################ */
/* ** U64 : */
#ifdef U64
/* ##M==================================================================================
NAME : MTH_M_xSqrtLow
DESCRIPTION : Return the square root of a real number : Low precision/U64
INPUT : a : MTH_tdxReal
OUTPUT : sqrt(a) : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xSqrtLow( A ) \
( sqrtf( A ) )
/* ##M==================================================================================
NAME : MTH_M_xSqrtMedium
DESCRIPTION : Return the square root of a real number : Medium precision/U64
INPUT : a : MTH_tdxReal
OUTPUT : sqrt(a) : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xSqrtMedium( A ) \
( sqrtf( A ) )
/* ##M==================================================================================
NAME : MTH_M_xSqrtHigh
DESCRIPTION : Return the square root of a real number : High precision/U64
INPUT : a : MTH_tdxReal
OUTPUT : sqrt(a) : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xSqrtHigh( A ) \
( sqrtf( A ) )
#else /* NOT U64 => PC */
/* ** PC : */
/* ##M==================================================================================
NAME : MTH_M_xSqrtLow
DESCRIPTION : Return the square root of a real number : Low precision/PC
INPUT : a : MTH_tdxReal
OUTPUT : sqrt(a) : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xSqrtLow( A ) \
( MTH_fn_fSquareRootOpt( A ) )
/* ##M==================================================================================
NAME : MTH_M_xSqrtMedium
DESCRIPTION : Return the square root of a real number : Medium precision/PC
INPUT : a : MTH_tdxReal
OUTPUT : sqrt(a) : MTH_tdxReal
=======================================================================================*/
#ifdef MTH_CHECK
#define MTH_M_xSqrtMedium( A ) MTH_fn_xSqrtCHK( A )
#else /* NOT MTH_CHECK */
#define MTH_M_xSqrtMedium( A ) \
( (float) sqrt((double) (A)) )
#endif /* MTH_CHECK OR NOT */
/* ##M==================================================================================
NAME : MTH_M_xSqrtHigh
DESCRIPTION : Return the square root of a real number : High precision/PC
INPUT : a : MTH_tdxReal
OUTPUT : sqrt(a) : MTH_tdxReal
=======================================================================================*/
#ifdef MTH_CHECK
#define MTH_M_xSqrtHigh( A ) MTH_fn_xSqrtCHK( A )
#else /* NOT MTH_CHECK */
#define MTH_M_xSqrtHigh( A ) \
( (float) sqrt((double) (A)) )
#endif /* MTH_CHECK OR NOT */
#endif /* U64 OR PC */
/* ##-#############################
## INV
################################ */
/* ** U64 : */
#ifdef U64
/* ##M==================================================================================
NAME : MTH_M_xInvLow
DESCRIPTION : Return the inverse of a real number : Low precision/U64
INPUT : a : MTH_tdxReal
OUTPUT : 1/a : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xInvLow( A ) \
( MTH_C_ONE / (A) )
/* ##M==================================================================================
NAME : MTH_M_xInvMedium
DESCRIPTION : Return the inverse of a real number : Medium precision/U64
INPUT : a : MTH_tdxReal
OUTPUT : 1/a : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xInvMedium( A ) \
( MTH_C_ONE / (A) )
/* ##M==================================================================================
NAME : MTH_M_xInvHigh
DESCRIPTION : Return the inverse of a real number : High precision/U64
INPUT : a : MTH_tdxReal
OUTPUT : 1/a : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xInvHigh( A ) \
( MTH_C_ONE / (A) )
#else /* NOT U64 => PC */
/* ** PC : */
/* ##M==================================================================================
NAME : MTH_M_xInvLow
DESCRIPTION : Return the inverse of a real number : Low precision/PC
INPUT : a : MTH_tdxReal
OUTPUT : 1/a : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xInvLow( A ) \
( MTH_fn_fInverseOpt( A ) )
/* ##M==================================================================================
NAME : MTH_M_xInvMedium
DESCRIPTION : Return the inverse of a real number : Medium precision/PC
INPUT : a : MTH_tdxReal
OUTPUT : 1/a : MTH_tdxReal
=======================================================================================*/
#ifdef MTH_CHECK
#define MTH_M_xInvMedium( A ) MTH_fn_xInvCHK( A )
#else /* NOT MTH_CHECK */
#define MTH_M_xInvMedium( A ) \
( MTH_C_ONE / (A) )
#endif /* MTH_CHECK OR NOT */
/* ##M==================================================================================
NAME : MTH_M_xInvHigh
DESCRIPTION : Return the inverse of a real number : High precision/PC
INPUT : a : MTH_tdxReal
OUTPUT : 1/a : MTH_tdxReal
=======================================================================================*/
#ifdef MTH_CHECK
#define MTH_M_xInvHigh( A ) MTH_fn_xInvCHK( A )
#else /* NOT MTH_CHECK */
#define MTH_M_xInvHigh( A ) \
( MTH_C_ONE / (A) )
#endif /* MTH_CHECK OR NOT */
#endif /* U64 OR PC */
/* ##-#############################
## DIV
################################ */
/* ** U64 : */
#ifdef U64
/* ##M==================================================================================
NAME : MTH_M_xDivLow
DESCRIPTION : Return the division of two real numbers : Low precision/U64
INPUT : a, b : 2 MTH_tdxReal
OUTPUT : a / b : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xDivLow( A, B) \
((A) / (B))
/* ##M==================================================================================
NAME : MTH_M_xDivMedium
DESCRIPTION : Return the division of two real numbers : Medium precision/U64
INPUT : a, b : 2 MTH_tdxReal
OUTPUT : a / b : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xDivMedium( A, B) \
((A) / (B))
/* ##M==================================================================================
NAME : MTH_M_xDivHigh
DESCRIPTION : Return the division of two real numbers : High precision/U64
INPUT : a, b : 2 MTH_tdxReal
OUTPUT : a / b : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xDivHigh( A, B) \
((A) / (B))
#else /* NOT U64 => PC */
/* ** PC : */
/* ##M==================================================================================
NAME : MTH_M_xDivLow
DESCRIPTION : Return the division of two real numbers : Low precision/PC
INPUT : a, b : 2 MTH_tdxReal
OUTPUT : a / b : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xDivLow( A, B ) \
( MTH_M_xMul( (A), ( MTH_fn_fInverseOpt( B ) ) ) )
/* ##M==================================================================================
NAME : MTH_M_xDivMedium
DESCRIPTION : Return the division of two real numbers : Medium precision/PC
INPUT : a, b : 2 MTH_tdxReal
OUTPUT : a / b : MTH_tdxReal
=======================================================================================*/
#ifdef MTH_CHECK
#define MTH_M_xDivMedium( A, B) MTH_fn_xDivCHK( A, B)
#else /* NOT MTH_CHECK */
#define MTH_M_xDivMedium( A, B) \
((A) / (B))
#endif /* MTH_CHECK OR NOT */
/* ##M==================================================================================
NAME : MTH_M_xDivHigh
DESCRIPTION : Return the division of two real numbers : High precision/PC
INPUT : a, b : 2 MTH_tdxReal
OUTPUT : a / b : MTH_tdxReal
=======================================================================================*/
#ifdef MTH_CHECK
#define MTH_M_xDivHigh( A, B) MTH_fn_xDivCHK( A, B)
#else /* NOT MTH_CHECK */
#define MTH_M_xDivHigh( A, B) \
((A) / (B))
#endif /* MTH_CHECK OR NOT */
#endif /* U64 OR PC */
/* ##-#############################
## INV SQRT
################################ */
/* ** U64 : */
#ifdef U64
/* ##M==================================================================================
NAME : MTH_M_xInvSqrtLow
DESCRIPTION : Return the inverse of the square root of a real number : Low precision/U64
INPUT : a : MTH_tdxReal
OUTPUT : 1/sqrt(a) : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xInvSqrtLow( A ) \
( MTH_C_ONE / sqrtf( A ) )
/* ##M==================================================================================
NAME : MTH_M_xInvSqrtMedium
DESCRIPTION : Return the inverse of the square root of a real number : Medium precision/U64
INPUT : a : MTH_tdxReal
OUTPUT : 1/sqrt(a) : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xInvSqrtMedium( A ) \
( MTH_C_ONE / sqrtf( A ) )
/* ##M==================================================================================
NAME : MTH_M_xInvSqrtHigh
DESCRIPTION : Return the inverse of the square root of a real number : High precision/U64
INPUT : a : MTH_tdxReal
OUTPUT : 1/sqrt(a) : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xInvSqrtHigh( A ) \
( MTH_C_ONE / sqrtf( A ) )
#else /* NOT U64 => PC */
/* ** PC : */
/* ##M==================================================================================
NAME : MTH_M_xInvSqrtLow
DESCRIPTION : Return the inverse of the square root of a real number : Low precision/PC
INPUT : a : MTH_tdxReal
OUTPUT : 1/sqrt(a) : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xInvSqrtLow( A ) \
( MTH_fn_fInvSquareRootOpt( A ) )
/* ##M==================================================================================
NAME : MTH_M_xInvSqrtMedium
DESCRIPTION : Return the inverse of the square root of a real number : Medium precision/PC
INPUT : a : MTH_tdxReal
OUTPUT : 1/sqrt(a) : MTH_tdxReal
=======================================================================================*/
#ifdef MTH_CHECK
#define MTH_M_xInvSqrtMedium( A ) MTH_fn_xInvSqrtCHK( A )
#else /* NOT MTH_CHECK */
#define MTH_M_xInvSqrtMedium( A ) \
( MTH_C_ONE / (float) sqrt((double) (A)) )
#endif /* MTH_CHECK OR NOT */
/* ##M==================================================================================
NAME : MTH_M_xInvSqrtHigh
DESCRIPTION : Return the inverse of the square root of a real number : High precision/PC
INPUT : a : MTH_tdxReal
OUTPUT : 1/sqrt(a) : MTH_tdxReal
=======================================================================================*/
#ifdef MTH_CHECK
#define MTH_M_xInvSqrtHigh( A ) MTH_fn_xInvSqrtCHK( A )
#else /* NOT MTH_CHECK */
#define MTH_M_xInvSqrtHigh( A ) \
( MTH_C_ONE / (float) sqrt((double) (A)) )
#endif /* MTH_CHECK OR NOT */
#endif /* U64 OR PC */
/* ##-#############################
## INIT
################################ */
/* ##M==================================================================================
NAME : MTH_M_vInit
DESCRIPTION : Initialization
INPUT : void
OUTPUT : void
=======================================================================================*/
#ifdef U64
#define MTH_M_vInit() \
{}
#else /* PC */
/* LOW : */
#ifdef MTH_LOW
#define MTH_M_vInit() \
{ MTH_fn_vInit(); \
}
#endif /* MTH_LOW */
/* MEDIUM : */
#ifdef MTH_MEDIUM
#define MTH_M_vInit() \
{ MTH_fn_vInit(); \
}
#endif /* MTH_MEDIUM */
/* HIGH : */
#ifdef MTH_HIGH
#define MTH_M_vInit() \
{ MTH_fn_vInit(); \
}
#endif /* MTH_HIGH */
#endif /* U64 OR PC */
/* --------------------------------------------------------------------------------------- */
/* ------------ This part is not Public, but is here for __inline function --------------- */
/* ------------ !!! DO NOT USE THIS DIRECTLY !!! - FOR PC ONLY --------------------------- */
/* ------------ WARNING : these functions can disrupting others algoritms ---------------- */
/* --------------------------------------------------------------------------------------- */
/* #define MTH_PARANOID */ /* Uncomment this to armor code */
/* NOT for U64 */
#ifndef U64
/* ##-###########################
## Set Pentium FPU Precision
############################## */
/* ##F===================================================================================
NAME : MTH_fn_vSet24bitFPU
DESCRIPTION : Set Pentium FPU internal precision to 24bit
INPUT : void
OUTPUT : void
=======================================================================================*/
INLINE void MTH_CALL MTH_fn_vSet24bitFPU(void)
{
long memvar;
_asm{
#ifdef MTH_PARANOID
push eax
#endif /* MTH_PARANOID */
finit
fwait
fstcw [memvar]
fwait
mov eax, [memvar]
and eax, 0fffffcffh
mov [memvar], eax
fldcw [memvar]
fwait
#ifdef MTH_PARANOID
pop eax
#endif /* MTH_PARANOID */
}
}
/* ##F===================================================================================
NAME : MTH_fn_vSet53bitFPU
DESCRIPTION : Set Pentium FPU internal precision to 53bit
INPUT : void
OUTPUT : void
=======================================================================================*/
INLINE void MTH_CALL MTH_fn_vSet53bitFPU(void)
{
long memvar;
_asm{
#ifdef MTH_PARANOID
push eax
#endif /* MTH_PARANOID */
finit
fwait
fstcw [memvar]
fwait
mov eax, [memvar]
and eax, 0fffffeffh
mov [memvar], eax
fldcw [memvar]
fwait
#ifdef MTH_PARANOID
pop eax
#endif /* MTH_PARANOID */
}
}
/* ##F===================================================================================
NAME : MTH_fn_vSet64bitFPU
DESCRIPTION : Set Pentium FPU internal precision to 64bit
INPUT : void
OUTPUT : void
=======================================================================================*/
INLINE void MTH_CALL MTH_fn_vSet64bitFPU(void)
{
long memvar;
_asm{
#ifdef MTH_PARANOID
push eax
#endif /* MTH_PARANOID */
finit
fwait
fstcw [memvar]
fwait
mov eax, [memvar]
and eax, 0ffffffffh
mov [memvar], eax
fldcw [memvar]
fwait
#ifdef MTH_PARANOID
pop eax
#endif /* MTH_PARANOID */
}
}
/* ##-###########################
## FloatToLong
############################## */
/* WARNING : These functions are experimental and for PC Only.
Try it, measure performance and degradation before making your choice.
Both C and ASM version are avaible, C version is faster than ASM version
for Visual C, because compiler can optimise with surround code.
*/
extern CPA_EXPORT double MTH_gs_dDecal; /* = 3.0F*pow(2, 51); */
/* ##F===================================================================================
NAME : MTH_fn_fFloat2LongRoundOptASM
DESCRIPTION : Return long round of a real number : asm version
Do not use this function ! MTH_fn_fFloat2LongRoundOpt is more efficient.
INPUT : a : float
OUTPUT : round(a) : long
=======================================================================================*/
INLINE long MTH_fn_fFloat2LongRoundOptASM (float f)
{
long res;
_asm{
fld dword ptr f
fadd qword ptr MTH_gs_dDecal
fstp qword ptr res
}
return (res);
}
/* ##F===================================================================================
NAME : MTH_fn_fFloat2LongRoundOpt
DESCRIPTION : Return long round of a real number : C version (more optimized than asm)
Exemples : 7.7 return 8, -7.7 return -8
INPUT : a : float
OUTPUT : round(a) : long
=======================================================================================*/
INLINE long MTH_fn_fFloat2LongRoundOpt (double f)
{
long res;
MTH_M_vCHK(f);
f+= MTH_gs_dDecal;
res= *(long *)&f;
return (res);
}
/* ##F===================================================================================
NAME : MTH_fn_fFloat2LongOptASM
DESCRIPTION : Return long truncation of a real number : asm version
Do not use this function ! MTH_fn_fFloat2LongOpt is more efficient.
INPUT : a : float
OUTPUT : (long)(a) : long
=======================================================================================*/
INLINE long MTH_fn_fFloat2LongOptASM (float f)
{
long res;
double m=0.49999999;
_asm{
mov ebx, f
test ebx, 0x80000000
jne F2L_negativ
fld dword ptr f
fsub qword ptr m
fadd qword ptr MTH_gs_dDecal
fstp qword ptr res
jmp F2L_fin
F2L_negativ:
and ebx, 0x7FFFFFFF
mov f, ebx
fld dword ptr f
fsub qword ptr m
fadd qword ptr MTH_gs_dDecal
xor ebx, ebx
fstp qword ptr res
sub ebx, res
mov res, ebx
F2L_fin :
}
return (res);
}
/* ##F===================================================================================
NAME : MTH_fn_fFloat2LongRoundOpt
DESCRIPTION : Return long truncation of a real number : C version (more optimized than asm)
Examples : 7.7 return 7, -7.7 return -7
Warning : Precision of 0.000001
INPUT : a : float
OUTPUT : (long)(a) : long
=======================================================================================*/
INLINE long MTH_fn_fFloat2LongOpt (double f)
{
long res;
static double m=0.499999; /* Do not add `9` unless you decrease outpout domaine */
MTH_M_vCHK(f);
if( f>0)
{
f-= m;
f+= MTH_gs_dDecal;
res= *(long *)&f;
return (res);
}
else
{
f= -f;
f-= m;
f+= MTH_gs_dDecal;
res= *(long *)&f;
return (-res);
}
}
/* ##-###########################
## TABLE BASED OPTIMIZATION
############################## */
/* WARNING : These functions are experimental and for PC Only.
Performance depend of context !
Best performance for serial calculous (less cache miss).
Try it, measure performance and degradation before making your choice.
*/
extern CPA_EXPORT unsigned long MTH_g_a2048_fSquareRootTable[1024*2];
/* ##F===================================================================================
NAME : MTH_fn_fSquareRootOpt
DESCRIPTION : Return square root of a real number : optimized with table.
INPUT : a : float
OUTPUT : sqrt(a) : float
=======================================================================================*/
INLINE float MTH_CALL MTH_fn_fSquareRootOpt(float f)
{
float res_sqrt;
MTH_M_vCHK(f);
/* f= (-1)^s.2^E.[1.M] */
_asm{
#ifdef MTH_PARANOID
push ebx
push eax
#endif /* MTH_PARANOID */
mov ebx,f /* ebx = f */
mov eax,f /* eax = f */
and ebx,0x7F800000 /* ebx = E */
and eax,0x00FFE000 /* eax = 1st bit of E & M */
add ebx,0x3F800000 /* ebx= E + (127<<23) */
shr ebx,1 /* ebx = ebx/2 */
shr eax,11 /* eax = index on table */
and ebx,0x7F800000 /* ebx = new E */
add ebx,dword ptr[MTH_g_a2048_fSquareRootTable+eax] /* Get from table */
mov dword ptr[res_sqrt],ebx
#ifdef MTH_PARANOID
pop eax
pop ebx
#endif /* MTH_PARANOID */
}
MTH_M_vCHK(res_sqrt);
return (res_sqrt);
}
extern CPA_EXPORT unsigned long MTH_g_a1024_fInverse[1024];
/* ##F===================================================================================
NAME : MTH_fn_fInverseOpt
DESCRIPTION : Return inverse of a real number : optimized with table.
INPUT : a : float
OUTPUT : 1/a : float
=======================================================================================*/
INLINE float MTH_CALL MTH_fn_fInverseOpt(float f)
{
float res_inv;
MTH_M_vCHK(f);
_asm{
#ifdef MTH_PARANOID
push ecx
push ebx
push eax
#endif /* MTH_PARANOID */
mov ebx,f
mov ecx,0x7E800000 /* 1 Clocks */
mov eax,ebx
and eax,0x007FE000 /* 1 Clocks */
and ebx,0xFF800000
shr eax,11 /* 1 Clocks */
sub ecx,ebx
add ecx,dword ptr[MTH_g_a1024_fInverse + eax] /* 3 Clocks Exp_AGI_U_Pem:1 */
mov dword ptr[res_inv],ecx /* 1 Clocks Exp_Flow_Dep_ecx */
#ifdef MTH_PARANOID
pop eax
pop ebx
pop ecx
#endif /* MTH_PARANOID */
}
MTH_M_vCHK(res_inv);
return (res_inv);
}
extern CPA_EXPORT unsigned long MTH_g_a2048_fInvSquareRootTable[1024*2];
/* ##F===================================================================================
NAME : MTH_fn_fInvSquareRootOpt
DESCRIPTION : Return inverse square root of a real number : optimized with table.
INPUT : a : float
OUTPUT : 1/sqrt(a) : float
=======================================================================================*/
INLINE float MTH_CALL MTH_fn_fInvSquareRootOpt(float f)
{
float res_invsqrt;
/* To test vality of this function :
float res_high;
char c_test[30];
*/
MTH_M_vCHK(f);
/* f= (-1)^s.2^E.[1.M] */
_asm{
#ifdef MTH_PARANOID
push ecx
push ebx
push eax
#endif /* MTH_PARANOID */
mov ecx,f /* ecx = f */
mov eax,f /* eax = f, to allow pairing */
and ecx,0x7F800000 /* ecx = E */
mov ebx,0xBD800000 /* ebx= 379 << 23 */
and eax,0x00FFE000 /* 1st bit of E (odd/even) & 10 high of M */
sub ebx,ecx /* ebx= (379 << 23) -E */
shr ebx,1 /* ebx= ebx/2 */
shr eax,11 /* eax = index on table */
and ebx,0x7F800000 /* ebx = new E */
add ebx,dword ptr[MTH_g_a2048_fInvSquareRootTable + eax] /* Get from table */
mov dword ptr[res_invsqrt],ebx
#ifdef MTH_PARANOID
pop eax
pop ebx
pop ecx
#endif /* MTH_PARANOID */
}
/*
res_high= 1.0F/sqrt((float)f);
assert( abs(1.0F-res_high/res_invsqrt) <0.001 );
*/
MTH_M_vCHK(res_invsqrt);
return (res_invsqrt);
}
/* ##F===================================================================================
NAME : MTH_fn_vInit
DESCRIPTION : Initialize tables
INPUT : void
OUTPUT : void
=======================================================================================*/
extern CPA_EXPORT void MTH_fn_vInit( void );
/* ======================================================================================
NAME : MTH_M_bEqualZeroAsm
DESCRIPTION : Tests if a float number equals zero
INPUT : A : float
OUTPUT : A==0 : unsigned char
Author: Yann Le Tensorer sept 25,1998
Cycles: 4 on Pentium, 3 on Pentium II
Note: Faster than the "normal" way for both pentium & Pentium II
return type is unsigned char, so it is tested by visual C with "test al,al"
=======================================================================================*/
#pragma warning( disable : 4035 )
static __inline unsigned char __fastcall MTH_M_bEqualZeroAsm( float A)
{
__asm
{
mov ecx,A
mov al,0 ;default return value is 0 (not equal to 0)
and ecx,0x7FFFFFFF ;very important: clear sign flag because negative zero also compares to zero...
cmp ecx,1 ;cf is set only if ecx=0 (we compare to 1, so cf is set if ecx is strictly smaller then 1)
adc al,0 ;add carry to al
;no need of return, default returned vaue is in al
}
}
/* ======================================================================================
NAME : MTH_M_bDifferentZeroAsm
DESCRIPTION : Tests if a float number is different from zero
INPUT : A : float
OUTPUT : A!=0 : unsigned char
Author: Yann Le Tensorer sept 25,1998
Cycles: 4 on Pentium, 3 on Pentium II
Note: Faster than the "normal" way for both pentium & Pentium II
return type is unsigned char, so it is tested by visual C with "test al,al"
=======================================================================================*/
static __inline unsigned char __fastcall MTH_M_bDifferentZeroAsm( float A)
{
__asm
{
mov ecx,A
mov al,1 ;default return value is 0 (not equal to 0)
and ecx,0x7FFFFFFF ;very important: clear sign flag because negative zero also compares to zero...
cmp ecx,1 ;cf is set only if ecx=0 (we compare to 1, so cf is set if ecx is strictly smaller then 1)
sbb al,0 ;sub carry to al
;no need of return, default returned vaue is in al
}
}
/* ======================================================================================
NAME : MTH_M_bGreaterZeroAsm
DESCRIPTION : Tests if a float number is strictly greater than zero
INPUT : A : float
OUTPUT : A>0 : unsigned char
Author: Yann Le Tensorer sept 25,1998
Cycles: 3 on Pentium, 2 on Pentium II
Note: Faster than the "normal" way for both pentium & Pentium II
=======================================================================================*/
static __inline unsigned char __fastcall MTH_M_bGreaterZeroAsm( float A)
{
__asm
{
mov ecx,A
mov al,0 ; default is less or equal zero
sub ecx,1 ;
cmp ecx,0x7fffffff ;key of the algorithm: cf is set if (A-1)<0x7fffffff
adc al,0 ; add cf to al
}
}
/* ======================================================================================
NAME : MTH_M_bLessOrEqualZeroAsm
DESCRIPTION : Tests if a float number is strictly greater than zero
INPUT : A : float
OUTPUT : A<=0 : unsigned char
Author: Yann Le Tensorer sept 25,1998
Cycles: 4 on Pentium, 3 on Pentium II
Note: Faster than the "normal" way for both pentium & Pentium II
=======================================================================================*/
static __inline unsigned char __fastcall MTH_M_bLessEqualZeroAsm( float A)
{
__asm
{
mov ecx,A
mov al,1 ; default is less or equal zero
sub ecx,1 ;
cmp ecx,0x7fffffff ;key of the algorithm: cf is set if (A-1)<0x7fffffff
sbb al,0 ; add cf to al
}
}
/* ======================================================================================
NAME : MTH_M_bLessZeroAsm
DESCRIPTION : Tests if a float number is strictly less than zero
INPUT : A : float
OUTPUT : A<0 : unsigned char
Author: Yann Le Tensorer sept 25,1998
Cycles: 2 on Pentium,2 on Pentium II
Note: Faster than the "normal" way for both pentium & Pentium II
=======================================================================================*/
static __inline unsigned char __fastcall MTH_M_bLessZeroAsm( float A)
{
__asm
{
mov al,1
cmp A,0x80000001
sbb al,0
}
}
/* ======================================================================================
NAME : MTH_M_bGreaterEqualZeroAsm
DESCRIPTION : Tests if a float number is strictly less than zero
INPUT : A : float
OUTPUT : A>=0 : unsigned char
Author: Yann Le Tensorer sept 25,1998
Cycles: 2 on Pentium,2 on Pentium II
Note: Faster than the "normal" way for both pentium & Pentium II
=======================================================================================*/
static __inline unsigned char __fastcall MTH_M_bGreaterEqualZeroAsm( float A)
{
__asm
{
mov al,0
cmp A,0x80000001
adc al,0
}
}
#endif /* NOT U64 */
#if defined(__cplusplus)
}
#endif
#endif /* MTH_FOPT_H */