1168 lines
36 KiB
C
1168 lines
36 KiB
C
/* ##H_FILE#
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
FILE : MTH_fopt.h
|
|
MODULE : MTH (Common Mathematic Library)
|
|
|
|
DESCRIPTION : Optimization for PC and float
|
|
|
|
VERSION : MTH V5.0.13 / Alexandre LANGER [ALX] Ubi R&D / Add Comments
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
*/
|
|
|
|
|
|
/* ##INCLUDE#----------------------------------------------------------------------------
|
|
Includes Files
|
|
---------------------------------------------------------------------------------------*/
|
|
|
|
#include <assert.h>
|
|
#ifndef MTH_FOPT_H
|
|
#define MTH_FOPT_H
|
|
|
|
#include "acp_base.h"
|
|
|
|
#if defined(__cplusplus)
|
|
extern "C"
|
|
{
|
|
#endif
|
|
|
|
/* For DLL : */
|
|
#include "cpa_expt.h"
|
|
|
|
|
|
/* ##-###############################
|
|
## Compiler MTH Directive Summary
|
|
################################## */
|
|
|
|
/*
|
|
Enable optimization :
|
|
MTH_OPTIMIZE : Allow MTH optimization
|
|
|
|
Enable dynamic mathematic check :
|
|
MTH_CHECK : Checking In and Out mathematics validity
|
|
|
|
Levels of precisions/optimisations :
|
|
MTH_LOW : Faster
|
|
MTH_MEDIUM : Some optimisation
|
|
MTH_HIGH : Slow : no optimisation
|
|
|
|
Targets :
|
|
MTH_PC_DEV : PC machine
|
|
MTH_U64_DEV : Nitendo 64
|
|
MTH_UNKOWN_DEV : Unkown machine
|
|
|
|
Type of Real :
|
|
MTH_RealIsFloat : float
|
|
MTH_RealIsDouble : double
|
|
MTH_RealIsFixed16_16 : Fixed 16-16 (Not Yet Implemented
|
|
|
|
Compilators :
|
|
VISUAL : Visual C
|
|
WATCOM : Watcom C
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/* ##MACRO#----------------------------------------------------------------------------
|
|
MACRO definition
|
|
---------------------------------------------------------------------------------------*/
|
|
|
|
/* ##-#######################################
|
|
## Link Macros with to Optimized versions
|
|
########################################## */
|
|
|
|
/* ****************** */
|
|
/* *** MTH_M_xDiv *** */
|
|
/* ****************** */
|
|
#define MTH_M_xQuickDiv MTH_M_xDivLow
|
|
|
|
#ifdef MTH_LOW
|
|
#define MTH_M_xDivOpt MTH_M_xDivLow
|
|
#endif /* MTH_LOW */
|
|
#ifdef MTH_MEDIUM
|
|
#define MTH_M_xDivOpt MTH_M_xDivMedium
|
|
#endif /* MTH_MEDIUM */
|
|
#ifdef MTH_HIGH
|
|
#define MTH_M_xDivOpt MTH_M_xDivHigh
|
|
#endif /* MTH_HIGH */
|
|
|
|
/* ****************** */
|
|
/* *** MTH_M_xSqrt ** */
|
|
/* ****************** */
|
|
#define MTH_M_xQuickSqrt MTH_M_xSqrtLow
|
|
|
|
#ifdef MTH_LOW
|
|
#define MTH_M_xSqrtOpt MTH_M_xSqrtLow
|
|
#endif /* MTH_LOW */
|
|
#ifdef MTH_MEDIUM
|
|
#define MTH_M_xSqrtOpt MTH_M_xSqrtMedium
|
|
#endif /* MTH_MEDIUM */
|
|
#ifdef MTH_HIGH
|
|
#define MTH_M_xSqrtOpt MTH_M_xSqrtHigh
|
|
#endif /* MTH_HIGH */
|
|
|
|
/* ********************* */
|
|
/* *** MTH_M_xInvSqrt ** */
|
|
/* ********************* */
|
|
#define MTH_M_xQuickInvSqrt MTH_M_xInvSqrtLow
|
|
|
|
#ifdef MTH_LOW
|
|
#define MTH_M_xInvSqrtOpt MTH_M_xInvSqrtLow
|
|
#endif /* MTH_LOW */
|
|
#ifdef MTH_MEDIUM
|
|
#define MTH_M_xInvSqrtOpt MTH_M_xInvSqrtMedium
|
|
#endif /* MTH_MEDIUM */
|
|
#ifdef MTH_HIGH
|
|
#define MTH_M_xInvSqrtOpt MTH_M_xInvSqrtHigh
|
|
#endif /* MTH_HIGH */
|
|
|
|
/* ***************** */
|
|
/* *** MTH_M_xInv ** */
|
|
/* ***************** */
|
|
#define MTH_M_xQuickInv MTH_M_xInvLow
|
|
|
|
#ifdef MTH_LOW
|
|
#define MTH_M_xInvOpt MTH_M_xInvLow
|
|
#endif /* MTH_LOW */
|
|
#ifdef MTH_MEDIUM
|
|
#define MTH_M_xInvOpt MTH_M_xInvMedium
|
|
#endif /* MTH_MEDIUM */
|
|
#ifdef MTH_HIGH
|
|
#define MTH_M_xInvOpt MTH_M_xInvHigh
|
|
#endif /* MTH_HIGH */
|
|
|
|
/* ***************************** */
|
|
/* *** MTH_M_xRealToLongRound ** */
|
|
/* ***************************** */
|
|
#ifdef MTH_LOW
|
|
#define MTH_M_xRealToLongRoundOpt MTH_M_xRealToLongRoundLow
|
|
#endif /* MTH_LOW */
|
|
#ifdef MTH_MEDIUM
|
|
#define MTH_M_xRealToLongRoundOpt MTH_M_xRealToLongRoundMedium
|
|
#endif /* MTH_MEDIUM */
|
|
#ifdef MTH_HIGH
|
|
#define MTH_M_xRealToLongRoundOpt MTH_M_xRealToLongRoundHigh
|
|
#endif /* MTH_HIGH */
|
|
|
|
/* ************************ */
|
|
/* *** MTH_M_xRealToLong ** */
|
|
/* ************************ */
|
|
#ifdef MTH_LOW
|
|
#define MTH_M_xRealToLongOpt MTH_M_xRealToLongLow
|
|
#endif /* MTH_LOW */
|
|
#ifdef MTH_MEDIUM
|
|
#define MTH_M_xRealToLongOpt MTH_M_xRealToLongMedium
|
|
#endif /* MTH_MEDIUM */
|
|
#ifdef MTH_HIGH
|
|
#define MTH_M_xRealToLongOpt MTH_M_xRealToLongHigh
|
|
#endif /* MTH_HIGH */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* ##-#############################
|
|
## Optimization implementations
|
|
################################ */
|
|
|
|
|
|
/* ##-#############################
|
|
## RealToLongRound
|
|
################################ */
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xRealToLongRoundLow
|
|
DESCRIPTION : Return long round of a real number : Low precision
|
|
INPUT : a : MTH_tdxReal
|
|
OUTPUT : round(a) : long
|
|
=======================================================================================*/
|
|
#define MTH_M_xRealToLongRoundLow MTH_fn_fFloat2LongRoundOpt
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xRealToLongRoundMedium
|
|
DESCRIPTION : Return long round of a real number : Medium precision
|
|
INPUT : a : MTH_tdxReal
|
|
OUTPUT : round(a) : long
|
|
=======================================================================================*/
|
|
#define MTH_M_xRealToLongRoundMedium(A) (long) ( (long)(A*2.0F)-(long)(A) )
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xRealToLongRoundHigh
|
|
DESCRIPTION : Return long round of a real number : High precision
|
|
INPUT : a : MTH_tdxReal
|
|
OUTPUT : round(a) : long
|
|
=======================================================================================*/
|
|
#define MTH_M_xRealToLongRoundHigh(A) (long) ( (long)(A*2.0F)-(long)(A) )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* ##-#############################
|
|
## RealToLong
|
|
################################ */
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xRealToLongLow
|
|
DESCRIPTION : Return long cast of a real number : Low precision
|
|
INPUT : a : MTH_tdxReal
|
|
OUTPUT : round(a) : long
|
|
=======================================================================================*/
|
|
#define MTH_M_xRealToLongLow MTH_fn_fFloat2LongOpt
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xRealToLongMedium
|
|
DESCRIPTION : Return long cast of a real number : Medium precision
|
|
INPUT : a : MTH_tdxReal
|
|
OUTPUT : round(a) : long
|
|
=======================================================================================*/
|
|
#define MTH_M_xRealToLongMedium(X) (long) ( X )
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xRealToLongHigh
|
|
DESCRIPTION : Return long cast of a real number : High precision
|
|
INPUT : a : MTH_tdxReal
|
|
OUTPUT : round(a) : long
|
|
=======================================================================================*/
|
|
#define MTH_M_xRealToLongHigh(X) (long) ( X )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* ##-#############################
|
|
## SQRT
|
|
################################ */
|
|
|
|
/* ** U64 : */
|
|
#ifdef U64
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xSqrtLow
|
|
DESCRIPTION : Return the square root of a real number : Low precision/U64
|
|
INPUT : a : MTH_tdxReal
|
|
OUTPUT : sqrt(a) : MTH_tdxReal
|
|
=======================================================================================*/
|
|
#define MTH_M_xSqrtLow( A ) \
|
|
( sqrtf( A ) )
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xSqrtMedium
|
|
DESCRIPTION : Return the square root of a real number : Medium precision/U64
|
|
INPUT : a : MTH_tdxReal
|
|
OUTPUT : sqrt(a) : MTH_tdxReal
|
|
=======================================================================================*/
|
|
#define MTH_M_xSqrtMedium( A ) \
|
|
( sqrtf( A ) )
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xSqrtHigh
|
|
DESCRIPTION : Return the square root of a real number : High precision/U64
|
|
INPUT : a : MTH_tdxReal
|
|
OUTPUT : sqrt(a) : MTH_tdxReal
|
|
=======================================================================================*/
|
|
#define MTH_M_xSqrtHigh( A ) \
|
|
( sqrtf( A ) )
|
|
|
|
#else /* NOT U64 => PC */
|
|
/* ** PC : */
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xSqrtLow
|
|
DESCRIPTION : Return the square root of a real number : Low precision/PC
|
|
INPUT : a : MTH_tdxReal
|
|
OUTPUT : sqrt(a) : MTH_tdxReal
|
|
=======================================================================================*/
|
|
#define MTH_M_xSqrtLow( A ) \
|
|
( MTH_fn_fSquareRootOpt( A ) )
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xSqrtMedium
|
|
DESCRIPTION : Return the square root of a real number : Medium precision/PC
|
|
INPUT : a : MTH_tdxReal
|
|
OUTPUT : sqrt(a) : MTH_tdxReal
|
|
=======================================================================================*/
|
|
#ifdef MTH_CHECK
|
|
#define MTH_M_xSqrtMedium( A ) MTH_fn_xSqrtCHK( A )
|
|
#else /* NOT MTH_CHECK */
|
|
#define MTH_M_xSqrtMedium( A ) \
|
|
( (float) sqrt((double) (A)) )
|
|
#endif /* MTH_CHECK OR NOT */
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xSqrtHigh
|
|
DESCRIPTION : Return the square root of a real number : High precision/PC
|
|
INPUT : a : MTH_tdxReal
|
|
OUTPUT : sqrt(a) : MTH_tdxReal
|
|
=======================================================================================*/
|
|
#ifdef MTH_CHECK
|
|
#define MTH_M_xSqrtHigh( A ) MTH_fn_xSqrtCHK( A )
|
|
#else /* NOT MTH_CHECK */
|
|
#define MTH_M_xSqrtHigh( A ) \
|
|
( (float) sqrt((double) (A)) )
|
|
#endif /* MTH_CHECK OR NOT */
|
|
|
|
#endif /* U64 OR PC */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* ##-#############################
|
|
## INV
|
|
################################ */
|
|
|
|
/* ** U64 : */
|
|
#ifdef U64
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xInvLow
|
|
DESCRIPTION : Return the inverse of a real number : Low precision/U64
|
|
INPUT : a : MTH_tdxReal
|
|
OUTPUT : 1/a : MTH_tdxReal
|
|
=======================================================================================*/
|
|
#define MTH_M_xInvLow( A ) \
|
|
( MTH_C_ONE / (A) )
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xInvMedium
|
|
DESCRIPTION : Return the inverse of a real number : Medium precision/U64
|
|
INPUT : a : MTH_tdxReal
|
|
OUTPUT : 1/a : MTH_tdxReal
|
|
=======================================================================================*/
|
|
#define MTH_M_xInvMedium( A ) \
|
|
( MTH_C_ONE / (A) )
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xInvHigh
|
|
DESCRIPTION : Return the inverse of a real number : High precision/U64
|
|
INPUT : a : MTH_tdxReal
|
|
OUTPUT : 1/a : MTH_tdxReal
|
|
=======================================================================================*/
|
|
#define MTH_M_xInvHigh( A ) \
|
|
( MTH_C_ONE / (A) )
|
|
|
|
#else /* NOT U64 => PC */
|
|
/* ** PC : */
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xInvLow
|
|
DESCRIPTION : Return the inverse of a real number : Low precision/PC
|
|
INPUT : a : MTH_tdxReal
|
|
OUTPUT : 1/a : MTH_tdxReal
|
|
=======================================================================================*/
|
|
#define MTH_M_xInvLow( A ) \
|
|
( MTH_fn_fInverseOpt( A ) )
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xInvMedium
|
|
DESCRIPTION : Return the inverse of a real number : Medium precision/PC
|
|
INPUT : a : MTH_tdxReal
|
|
OUTPUT : 1/a : MTH_tdxReal
|
|
=======================================================================================*/
|
|
#ifdef MTH_CHECK
|
|
#define MTH_M_xInvMedium( A ) MTH_fn_xInvCHK( A )
|
|
#else /* NOT MTH_CHECK */
|
|
#define MTH_M_xInvMedium( A ) \
|
|
( MTH_C_ONE / (A) )
|
|
#endif /* MTH_CHECK OR NOT */
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xInvHigh
|
|
DESCRIPTION : Return the inverse of a real number : High precision/PC
|
|
INPUT : a : MTH_tdxReal
|
|
OUTPUT : 1/a : MTH_tdxReal
|
|
=======================================================================================*/
|
|
#ifdef MTH_CHECK
|
|
#define MTH_M_xInvHigh( A ) MTH_fn_xInvCHK( A )
|
|
#else /* NOT MTH_CHECK */
|
|
#define MTH_M_xInvHigh( A ) \
|
|
( MTH_C_ONE / (A) )
|
|
#endif /* MTH_CHECK OR NOT */
|
|
|
|
#endif /* U64 OR PC */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* ##-#############################
|
|
## DIV
|
|
################################ */
|
|
|
|
/* ** U64 : */
|
|
#ifdef U64
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xDivLow
|
|
DESCRIPTION : Return the division of two real numbers : Low precision/U64
|
|
INPUT : a, b : 2 MTH_tdxReal
|
|
OUTPUT : a / b : MTH_tdxReal
|
|
=======================================================================================*/
|
|
#define MTH_M_xDivLow( A, B) \
|
|
((A) / (B))
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xDivMedium
|
|
DESCRIPTION : Return the division of two real numbers : Medium precision/U64
|
|
INPUT : a, b : 2 MTH_tdxReal
|
|
OUTPUT : a / b : MTH_tdxReal
|
|
=======================================================================================*/
|
|
#define MTH_M_xDivMedium( A, B) \
|
|
((A) / (B))
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xDivHigh
|
|
DESCRIPTION : Return the division of two real numbers : High precision/U64
|
|
INPUT : a, b : 2 MTH_tdxReal
|
|
OUTPUT : a / b : MTH_tdxReal
|
|
=======================================================================================*/
|
|
#define MTH_M_xDivHigh( A, B) \
|
|
((A) / (B))
|
|
|
|
#else /* NOT U64 => PC */
|
|
/* ** PC : */
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xDivLow
|
|
DESCRIPTION : Return the division of two real numbers : Low precision/PC
|
|
INPUT : a, b : 2 MTH_tdxReal
|
|
OUTPUT : a / b : MTH_tdxReal
|
|
=======================================================================================*/
|
|
#define MTH_M_xDivLow( A, B ) \
|
|
( MTH_M_xMul( (A), ( MTH_fn_fInverseOpt( B ) ) ) )
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xDivMedium
|
|
DESCRIPTION : Return the division of two real numbers : Medium precision/PC
|
|
INPUT : a, b : 2 MTH_tdxReal
|
|
OUTPUT : a / b : MTH_tdxReal
|
|
=======================================================================================*/
|
|
#ifdef MTH_CHECK
|
|
#define MTH_M_xDivMedium( A, B) MTH_fn_xDivCHK( A, B)
|
|
#else /* NOT MTH_CHECK */
|
|
#define MTH_M_xDivMedium( A, B) \
|
|
((A) / (B))
|
|
#endif /* MTH_CHECK OR NOT */
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xDivHigh
|
|
DESCRIPTION : Return the division of two real numbers : High precision/PC
|
|
INPUT : a, b : 2 MTH_tdxReal
|
|
OUTPUT : a / b : MTH_tdxReal
|
|
=======================================================================================*/
|
|
#ifdef MTH_CHECK
|
|
#define MTH_M_xDivHigh( A, B) MTH_fn_xDivCHK( A, B)
|
|
#else /* NOT MTH_CHECK */
|
|
#define MTH_M_xDivHigh( A, B) \
|
|
((A) / (B))
|
|
#endif /* MTH_CHECK OR NOT */
|
|
|
|
#endif /* U64 OR PC */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* ##-#############################
|
|
## INV SQRT
|
|
################################ */
|
|
|
|
/* ** U64 : */
|
|
#ifdef U64
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xInvSqrtLow
|
|
DESCRIPTION : Return the inverse of the square root of a real number : Low precision/U64
|
|
INPUT : a : MTH_tdxReal
|
|
OUTPUT : 1/sqrt(a) : MTH_tdxReal
|
|
=======================================================================================*/
|
|
#define MTH_M_xInvSqrtLow( A ) \
|
|
( MTH_C_ONE / sqrtf( A ) )
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xInvSqrtMedium
|
|
DESCRIPTION : Return the inverse of the square root of a real number : Medium precision/U64
|
|
INPUT : a : MTH_tdxReal
|
|
OUTPUT : 1/sqrt(a) : MTH_tdxReal
|
|
=======================================================================================*/
|
|
#define MTH_M_xInvSqrtMedium( A ) \
|
|
( MTH_C_ONE / sqrtf( A ) )
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xInvSqrtHigh
|
|
DESCRIPTION : Return the inverse of the square root of a real number : High precision/U64
|
|
INPUT : a : MTH_tdxReal
|
|
OUTPUT : 1/sqrt(a) : MTH_tdxReal
|
|
=======================================================================================*/
|
|
#define MTH_M_xInvSqrtHigh( A ) \
|
|
( MTH_C_ONE / sqrtf( A ) )
|
|
|
|
#else /* NOT U64 => PC */
|
|
/* ** PC : */
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xInvSqrtLow
|
|
DESCRIPTION : Return the inverse of the square root of a real number : Low precision/PC
|
|
INPUT : a : MTH_tdxReal
|
|
OUTPUT : 1/sqrt(a) : MTH_tdxReal
|
|
=======================================================================================*/
|
|
#define MTH_M_xInvSqrtLow( A ) \
|
|
( MTH_fn_fInvSquareRootOpt( A ) )
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xInvSqrtMedium
|
|
DESCRIPTION : Return the inverse of the square root of a real number : Medium precision/PC
|
|
INPUT : a : MTH_tdxReal
|
|
OUTPUT : 1/sqrt(a) : MTH_tdxReal
|
|
=======================================================================================*/
|
|
#ifdef MTH_CHECK
|
|
#define MTH_M_xInvSqrtMedium( A ) MTH_fn_xInvSqrtCHK( A )
|
|
#else /* NOT MTH_CHECK */
|
|
#define MTH_M_xInvSqrtMedium( A ) \
|
|
( MTH_C_ONE / (float) sqrt((double) (A)) )
|
|
#endif /* MTH_CHECK OR NOT */
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_xInvSqrtHigh
|
|
DESCRIPTION : Return the inverse of the square root of a real number : High precision/PC
|
|
INPUT : a : MTH_tdxReal
|
|
OUTPUT : 1/sqrt(a) : MTH_tdxReal
|
|
=======================================================================================*/
|
|
#ifdef MTH_CHECK
|
|
#define MTH_M_xInvSqrtHigh( A ) MTH_fn_xInvSqrtCHK( A )
|
|
#else /* NOT MTH_CHECK */
|
|
#define MTH_M_xInvSqrtHigh( A ) \
|
|
( MTH_C_ONE / (float) sqrt((double) (A)) )
|
|
#endif /* MTH_CHECK OR NOT */
|
|
|
|
#endif /* U64 OR PC */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* ##-#############################
|
|
## INIT
|
|
################################ */
|
|
|
|
/* ##M==================================================================================
|
|
NAME : MTH_M_vInit
|
|
DESCRIPTION : Initialization
|
|
INPUT : void
|
|
OUTPUT : void
|
|
=======================================================================================*/
|
|
#ifdef U64
|
|
#define MTH_M_vInit() \
|
|
{}
|
|
#else /* PC */
|
|
|
|
/* LOW : */
|
|
#ifdef MTH_LOW
|
|
#define MTH_M_vInit() \
|
|
{ MTH_fn_vInit(); \
|
|
}
|
|
#endif /* MTH_LOW */
|
|
|
|
/* MEDIUM : */
|
|
#ifdef MTH_MEDIUM
|
|
#define MTH_M_vInit() \
|
|
{ MTH_fn_vInit(); \
|
|
}
|
|
|
|
#endif /* MTH_MEDIUM */
|
|
|
|
/* HIGH : */
|
|
#ifdef MTH_HIGH
|
|
#define MTH_M_vInit() \
|
|
{ MTH_fn_vInit(); \
|
|
}
|
|
|
|
#endif /* MTH_HIGH */
|
|
|
|
#endif /* U64 OR PC */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* --------------------------------------------------------------------------------------- */
|
|
/* ------------ This part is not Public, but is here for __inline function --------------- */
|
|
/* ------------ !!! DO NOT USE THIS DIRECTLY !!! - FOR PC ONLY --------------------------- */
|
|
/* ------------ WARNING : these functions can disrupting others algoritms ---------------- */
|
|
/* --------------------------------------------------------------------------------------- */
|
|
|
|
/* #define MTH_PARANOID */ /* Uncomment this to armor code */
|
|
|
|
/* NOT for U64 */
|
|
#ifndef U64
|
|
|
|
|
|
/* ##-###########################
|
|
## Set Pentium FPU Precision
|
|
############################## */
|
|
|
|
/* ##F===================================================================================
|
|
NAME : MTH_fn_vSet24bitFPU
|
|
DESCRIPTION : Set Pentium FPU internal precision to 24bit
|
|
INPUT : void
|
|
OUTPUT : void
|
|
=======================================================================================*/
|
|
INLINE void MTH_CALL MTH_fn_vSet24bitFPU(void)
|
|
{
|
|
long memvar;
|
|
|
|
_asm{
|
|
#ifdef MTH_PARANOID
|
|
push eax
|
|
#endif /* MTH_PARANOID */
|
|
|
|
finit
|
|
fwait
|
|
fstcw [memvar]
|
|
fwait
|
|
mov eax, [memvar]
|
|
and eax, 0fffffcffh
|
|
mov [memvar], eax
|
|
fldcw [memvar]
|
|
fwait
|
|
|
|
#ifdef MTH_PARANOID
|
|
pop eax
|
|
#endif /* MTH_PARANOID */
|
|
}
|
|
}
|
|
|
|
/* ##F===================================================================================
|
|
NAME : MTH_fn_vSet53bitFPU
|
|
DESCRIPTION : Set Pentium FPU internal precision to 53bit
|
|
INPUT : void
|
|
OUTPUT : void
|
|
=======================================================================================*/
|
|
INLINE void MTH_CALL MTH_fn_vSet53bitFPU(void)
|
|
{
|
|
long memvar;
|
|
|
|
_asm{
|
|
#ifdef MTH_PARANOID
|
|
push eax
|
|
#endif /* MTH_PARANOID */
|
|
finit
|
|
fwait
|
|
fstcw [memvar]
|
|
fwait
|
|
mov eax, [memvar]
|
|
and eax, 0fffffeffh
|
|
mov [memvar], eax
|
|
fldcw [memvar]
|
|
fwait
|
|
|
|
#ifdef MTH_PARANOID
|
|
pop eax
|
|
#endif /* MTH_PARANOID */
|
|
}
|
|
}
|
|
|
|
/* ##F===================================================================================
|
|
NAME : MTH_fn_vSet64bitFPU
|
|
DESCRIPTION : Set Pentium FPU internal precision to 64bit
|
|
INPUT : void
|
|
OUTPUT : void
|
|
=======================================================================================*/
|
|
INLINE void MTH_CALL MTH_fn_vSet64bitFPU(void)
|
|
{
|
|
long memvar;
|
|
|
|
_asm{
|
|
#ifdef MTH_PARANOID
|
|
push eax
|
|
#endif /* MTH_PARANOID */
|
|
|
|
finit
|
|
fwait
|
|
fstcw [memvar]
|
|
fwait
|
|
mov eax, [memvar]
|
|
and eax, 0ffffffffh
|
|
mov [memvar], eax
|
|
fldcw [memvar]
|
|
fwait
|
|
#ifdef MTH_PARANOID
|
|
pop eax
|
|
#endif /* MTH_PARANOID */
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* ##-###########################
|
|
## FloatToLong
|
|
############################## */
|
|
|
|
/* WARNING : These functions are experimental and for PC Only.
|
|
Try it, measure performance and degradation before making your choice.
|
|
Both C and ASM version are avaible, C version is faster than ASM version
|
|
for Visual C, because compiler can optimise with surround code.
|
|
*/
|
|
|
|
|
|
extern CPA_EXPORT double MTH_gs_dDecal; /* = 3.0F*pow(2, 51); */
|
|
|
|
/* ##F===================================================================================
|
|
NAME : MTH_fn_fFloat2LongRoundOptASM
|
|
DESCRIPTION : Return long round of a real number : asm version
|
|
Do not use this function ! MTH_fn_fFloat2LongRoundOpt is more efficient.
|
|
INPUT : a : float
|
|
OUTPUT : round(a) : long
|
|
=======================================================================================*/
|
|
INLINE long MTH_fn_fFloat2LongRoundOptASM (float f)
|
|
{
|
|
long res;
|
|
|
|
_asm{
|
|
fld dword ptr f
|
|
fadd qword ptr MTH_gs_dDecal
|
|
fstp qword ptr res
|
|
}
|
|
|
|
return (res);
|
|
}
|
|
|
|
/* ##F===================================================================================
|
|
NAME : MTH_fn_fFloat2LongRoundOpt
|
|
DESCRIPTION : Return long round of a real number : C version (more optimized than asm)
|
|
Exemples : 7.7 return 8, -7.7 return -8
|
|
INPUT : a : float
|
|
OUTPUT : round(a) : long
|
|
=======================================================================================*/
|
|
INLINE long MTH_fn_fFloat2LongRoundOpt (double f)
|
|
{
|
|
long res;
|
|
|
|
MTH_M_vCHK(f);
|
|
|
|
f+= MTH_gs_dDecal;
|
|
res= *(long *)&f;
|
|
|
|
return (res);
|
|
}
|
|
|
|
/* ##F===================================================================================
|
|
NAME : MTH_fn_fFloat2LongOptASM
|
|
DESCRIPTION : Return long truncation of a real number : asm version
|
|
Do not use this function ! MTH_fn_fFloat2LongOpt is more efficient.
|
|
INPUT : a : float
|
|
OUTPUT : (long)(a) : long
|
|
=======================================================================================*/
|
|
INLINE long MTH_fn_fFloat2LongOptASM (float f)
|
|
{
|
|
long res;
|
|
double m=0.49999999;
|
|
|
|
_asm{
|
|
mov ebx, f
|
|
test ebx, 0x80000000
|
|
jne F2L_negativ
|
|
fld dword ptr f
|
|
fsub qword ptr m
|
|
fadd qword ptr MTH_gs_dDecal
|
|
fstp qword ptr res
|
|
jmp F2L_fin
|
|
|
|
F2L_negativ:
|
|
and ebx, 0x7FFFFFFF
|
|
mov f, ebx
|
|
fld dword ptr f
|
|
fsub qword ptr m
|
|
fadd qword ptr MTH_gs_dDecal
|
|
xor ebx, ebx
|
|
fstp qword ptr res
|
|
sub ebx, res
|
|
mov res, ebx
|
|
F2L_fin :
|
|
}
|
|
|
|
return (res);
|
|
}
|
|
|
|
/* ##F===================================================================================
|
|
NAME : MTH_fn_fFloat2LongRoundOpt
|
|
DESCRIPTION : Return long truncation of a real number : C version (more optimized than asm)
|
|
Examples : 7.7 return 7, -7.7 return -7
|
|
Warning : Precision of 0.000001
|
|
INPUT : a : float
|
|
OUTPUT : (long)(a) : long
|
|
=======================================================================================*/
|
|
INLINE long MTH_fn_fFloat2LongOpt (double f)
|
|
{
|
|
long res;
|
|
static double m=0.499999; /* Do not add `9` unless you decrease outpout domaine */
|
|
|
|
MTH_M_vCHK(f);
|
|
|
|
if( f>0)
|
|
{
|
|
f-= m;
|
|
f+= MTH_gs_dDecal;
|
|
res= *(long *)&f;
|
|
return (res);
|
|
}
|
|
else
|
|
{
|
|
f= -f;
|
|
f-= m;
|
|
f+= MTH_gs_dDecal;
|
|
res= *(long *)&f;
|
|
return (-res);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* ##-###########################
|
|
## TABLE BASED OPTIMIZATION
|
|
############################## */
|
|
|
|
/* WARNING : These functions are experimental and for PC Only.
|
|
Performance depend of context !
|
|
Best performance for serial calculous (less cache miss).
|
|
Try it, measure performance and degradation before making your choice.
|
|
*/
|
|
|
|
extern CPA_EXPORT unsigned long MTH_g_a2048_fSquareRootTable[1024*2];
|
|
|
|
/* ##F===================================================================================
|
|
NAME : MTH_fn_fSquareRootOpt
|
|
DESCRIPTION : Return square root of a real number : optimized with table.
|
|
INPUT : a : float
|
|
OUTPUT : sqrt(a) : float
|
|
=======================================================================================*/
|
|
INLINE float MTH_CALL MTH_fn_fSquareRootOpt(float f)
|
|
{
|
|
float res_sqrt;
|
|
|
|
MTH_M_vCHK(f);
|
|
|
|
/* f= (-1)^s.2^E.[1.M] */
|
|
_asm{
|
|
#ifdef MTH_PARANOID
|
|
push ebx
|
|
push eax
|
|
#endif /* MTH_PARANOID */
|
|
|
|
mov ebx,f /* ebx = f */
|
|
mov eax,f /* eax = f */
|
|
and ebx,0x7F800000 /* ebx = E */
|
|
|
|
and eax,0x00FFE000 /* eax = 1st bit of E & M */
|
|
|
|
add ebx,0x3F800000 /* ebx= E + (127<<23) */
|
|
shr ebx,1 /* ebx = ebx/2 */
|
|
|
|
shr eax,11 /* eax = index on table */
|
|
|
|
and ebx,0x7F800000 /* ebx = new E */
|
|
add ebx,dword ptr[MTH_g_a2048_fSquareRootTable+eax] /* Get from table */
|
|
mov dword ptr[res_sqrt],ebx
|
|
|
|
#ifdef MTH_PARANOID
|
|
pop eax
|
|
pop ebx
|
|
#endif /* MTH_PARANOID */
|
|
}
|
|
|
|
MTH_M_vCHK(res_sqrt);
|
|
|
|
return (res_sqrt);
|
|
}
|
|
|
|
|
|
extern CPA_EXPORT unsigned long MTH_g_a1024_fInverse[1024];
|
|
|
|
/* ##F===================================================================================
|
|
NAME : MTH_fn_fInverseOpt
|
|
DESCRIPTION : Return inverse of a real number : optimized with table.
|
|
INPUT : a : float
|
|
OUTPUT : 1/a : float
|
|
=======================================================================================*/
|
|
INLINE float MTH_CALL MTH_fn_fInverseOpt(float f)
|
|
{
|
|
float res_inv;
|
|
|
|
MTH_M_vCHK(f);
|
|
|
|
_asm{
|
|
#ifdef MTH_PARANOID
|
|
push ecx
|
|
push ebx
|
|
push eax
|
|
#endif /* MTH_PARANOID */
|
|
|
|
mov ebx,f
|
|
mov ecx,0x7E800000 /* 1 Clocks */
|
|
mov eax,ebx
|
|
and eax,0x007FE000 /* 1 Clocks */
|
|
and ebx,0xFF800000
|
|
shr eax,11 /* 1 Clocks */
|
|
sub ecx,ebx
|
|
add ecx,dword ptr[MTH_g_a1024_fInverse + eax] /* 3 Clocks Exp_AGI_U_Pem:1 */
|
|
mov dword ptr[res_inv],ecx /* 1 Clocks Exp_Flow_Dep_ecx */
|
|
|
|
#ifdef MTH_PARANOID
|
|
pop eax
|
|
pop ebx
|
|
pop ecx
|
|
#endif /* MTH_PARANOID */
|
|
}
|
|
|
|
MTH_M_vCHK(res_inv);
|
|
|
|
return (res_inv);
|
|
}
|
|
|
|
|
|
extern CPA_EXPORT unsigned long MTH_g_a2048_fInvSquareRootTable[1024*2];
|
|
|
|
/* ##F===================================================================================
|
|
NAME : MTH_fn_fInvSquareRootOpt
|
|
DESCRIPTION : Return inverse square root of a real number : optimized with table.
|
|
INPUT : a : float
|
|
OUTPUT : 1/sqrt(a) : float
|
|
=======================================================================================*/
|
|
INLINE float MTH_CALL MTH_fn_fInvSquareRootOpt(float f)
|
|
{
|
|
float res_invsqrt;
|
|
/* To test vality of this function :
|
|
float res_high;
|
|
char c_test[30];
|
|
*/
|
|
|
|
MTH_M_vCHK(f);
|
|
|
|
/* f= (-1)^s.2^E.[1.M] */
|
|
_asm{
|
|
#ifdef MTH_PARANOID
|
|
push ecx
|
|
push ebx
|
|
push eax
|
|
#endif /* MTH_PARANOID */
|
|
|
|
mov ecx,f /* ecx = f */
|
|
mov eax,f /* eax = f, to allow pairing */
|
|
and ecx,0x7F800000 /* ecx = E */
|
|
|
|
mov ebx,0xBD800000 /* ebx= 379 << 23 */
|
|
|
|
and eax,0x00FFE000 /* 1st bit of E (odd/even) & 10 high of M */
|
|
|
|
sub ebx,ecx /* ebx= (379 << 23) -E */
|
|
shr ebx,1 /* ebx= ebx/2 */
|
|
|
|
shr eax,11 /* eax = index on table */
|
|
and ebx,0x7F800000 /* ebx = new E */
|
|
add ebx,dword ptr[MTH_g_a2048_fInvSquareRootTable + eax] /* Get from table */
|
|
mov dword ptr[res_invsqrt],ebx
|
|
#ifdef MTH_PARANOID
|
|
pop eax
|
|
pop ebx
|
|
pop ecx
|
|
#endif /* MTH_PARANOID */
|
|
}
|
|
/*
|
|
res_high= 1.0F/sqrt((float)f);
|
|
assert( abs(1.0F-res_high/res_invsqrt) <0.001 );
|
|
*/
|
|
|
|
MTH_M_vCHK(res_invsqrt);
|
|
|
|
return (res_invsqrt);
|
|
}
|
|
|
|
|
|
|
|
/* ##F===================================================================================
|
|
NAME : MTH_fn_vInit
|
|
DESCRIPTION : Initialize tables
|
|
INPUT : void
|
|
OUTPUT : void
|
|
=======================================================================================*/
|
|
extern CPA_EXPORT void MTH_fn_vInit( void );
|
|
|
|
|
|
/* ======================================================================================
|
|
NAME : MTH_M_bEqualZeroAsm
|
|
DESCRIPTION : Tests if a float number equals zero
|
|
INPUT : A : float
|
|
OUTPUT : A==0 : unsigned char
|
|
Author: Yann Le Tensorer sept 25,1998
|
|
Cycles: 4 on Pentium, 3 on Pentium II
|
|
Note: Faster than the "normal" way for both pentium & Pentium II
|
|
return type is unsigned char, so it is tested by visual C with "test al,al"
|
|
=======================================================================================*/
|
|
#pragma warning( disable : 4035 )
|
|
static __inline unsigned char __fastcall MTH_M_bEqualZeroAsm( float A)
|
|
{
|
|
__asm
|
|
{
|
|
mov ecx,A
|
|
mov al,0 ;default return value is 0 (not equal to 0)
|
|
and ecx,0x7FFFFFFF ;very important: clear sign flag because negative zero also compares to zero...
|
|
cmp ecx,1 ;cf is set only if ecx=0 (we compare to 1, so cf is set if ecx is strictly smaller then 1)
|
|
adc al,0 ;add carry to al
|
|
;no need of return, default returned vaue is in al
|
|
}
|
|
}
|
|
|
|
/* ======================================================================================
|
|
NAME : MTH_M_bDifferentZeroAsm
|
|
DESCRIPTION : Tests if a float number is different from zero
|
|
INPUT : A : float
|
|
OUTPUT : A!=0 : unsigned char
|
|
Author: Yann Le Tensorer sept 25,1998
|
|
Cycles: 4 on Pentium, 3 on Pentium II
|
|
Note: Faster than the "normal" way for both pentium & Pentium II
|
|
return type is unsigned char, so it is tested by visual C with "test al,al"
|
|
=======================================================================================*/
|
|
|
|
static __inline unsigned char __fastcall MTH_M_bDifferentZeroAsm( float A)
|
|
{
|
|
__asm
|
|
{
|
|
mov ecx,A
|
|
mov al,1 ;default return value is 0 (not equal to 0)
|
|
and ecx,0x7FFFFFFF ;very important: clear sign flag because negative zero also compares to zero...
|
|
cmp ecx,1 ;cf is set only if ecx=0 (we compare to 1, so cf is set if ecx is strictly smaller then 1)
|
|
sbb al,0 ;sub carry to al
|
|
;no need of return, default returned vaue is in al
|
|
}
|
|
}
|
|
|
|
|
|
/* ======================================================================================
|
|
NAME : MTH_M_bGreaterZeroAsm
|
|
DESCRIPTION : Tests if a float number is strictly greater than zero
|
|
INPUT : A : float
|
|
OUTPUT : A>0 : unsigned char
|
|
Author: Yann Le Tensorer sept 25,1998
|
|
Cycles: 3 on Pentium, 2 on Pentium II
|
|
Note: Faster than the "normal" way for both pentium & Pentium II
|
|
=======================================================================================*/
|
|
|
|
static __inline unsigned char __fastcall MTH_M_bGreaterZeroAsm( float A)
|
|
{
|
|
__asm
|
|
{
|
|
mov ecx,A
|
|
mov al,0 ; default is less or equal zero
|
|
sub ecx,1 ;
|
|
cmp ecx,0x7fffffff ;key of the algorithm: cf is set if (A-1)<0x7fffffff
|
|
adc al,0 ; add cf to al
|
|
}
|
|
}
|
|
|
|
|
|
/* ======================================================================================
|
|
NAME : MTH_M_bLessOrEqualZeroAsm
|
|
DESCRIPTION : Tests if a float number is strictly greater than zero
|
|
INPUT : A : float
|
|
OUTPUT : A<=0 : unsigned char
|
|
Author: Yann Le Tensorer sept 25,1998
|
|
Cycles: 4 on Pentium, 3 on Pentium II
|
|
Note: Faster than the "normal" way for both pentium & Pentium II
|
|
=======================================================================================*/
|
|
|
|
static __inline unsigned char __fastcall MTH_M_bLessEqualZeroAsm( float A)
|
|
{
|
|
__asm
|
|
{
|
|
mov ecx,A
|
|
mov al,1 ; default is less or equal zero
|
|
sub ecx,1 ;
|
|
cmp ecx,0x7fffffff ;key of the algorithm: cf is set if (A-1)<0x7fffffff
|
|
sbb al,0 ; add cf to al
|
|
}
|
|
}
|
|
|
|
/* ======================================================================================
|
|
NAME : MTH_M_bLessZeroAsm
|
|
DESCRIPTION : Tests if a float number is strictly less than zero
|
|
INPUT : A : float
|
|
OUTPUT : A<0 : unsigned char
|
|
Author: Yann Le Tensorer sept 25,1998
|
|
Cycles: 2 on Pentium,2 on Pentium II
|
|
Note: Faster than the "normal" way for both pentium & Pentium II
|
|
=======================================================================================*/
|
|
|
|
static __inline unsigned char __fastcall MTH_M_bLessZeroAsm( float A)
|
|
{
|
|
__asm
|
|
{
|
|
mov al,1
|
|
cmp A,0x80000001
|
|
sbb al,0
|
|
}
|
|
}
|
|
|
|
/* ======================================================================================
|
|
NAME : MTH_M_bGreaterEqualZeroAsm
|
|
DESCRIPTION : Tests if a float number is strictly less than zero
|
|
INPUT : A : float
|
|
OUTPUT : A>=0 : unsigned char
|
|
Author: Yann Le Tensorer sept 25,1998
|
|
Cycles: 2 on Pentium,2 on Pentium II
|
|
Note: Faster than the "normal" way for both pentium & Pentium II
|
|
=======================================================================================*/
|
|
|
|
static __inline unsigned char __fastcall MTH_M_bGreaterEqualZeroAsm( float A)
|
|
{
|
|
__asm
|
|
{
|
|
mov al,0
|
|
cmp A,0x80000001
|
|
adc al,0
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#endif /* NOT U64 */
|
|
|
|
#if defined(__cplusplus)
|
|
}
|
|
#endif
|
|
|
|
#endif /* MTH_FOPT_H */
|
|
|
|
|