/* ##H_FILE#
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
FILE :          MTH_fopt.h
MODULE :        MTH     (Common Mathematic Library) 
 
DESCRIPTION : Optimization for PC and float
 
VERSION :       MTH V5.0.13 / Alexandre LANGER [ALX] Ubi R&D / Add Comments
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
 
 
/* ##INCLUDE#----------------------------------------------------------------------------
                          Includes Files
---------------------------------------------------------------------------------------*/
 
#include <assert.h>
#ifndef MTH_FOPT_H
#define MTH_FOPT_H
 
#include "acp_base.h"
 
#if defined(__cplusplus)
    extern "C"
    {
#endif
 
/* For DLL : */
#include "cpa_expt.h" 
 
 
/* ##-############################### 
   ## Compiler MTH Directive Summary
   ################################## */
 
/*
 Enable optimization :
  MTH_OPTIMIZE  : Allow MTH optimization
 
 Enable dynamic mathematic check :
  MTH_CHECK     : Checking In and Out mathematics validity  
 
 Levels of precisions/optimisations :
  MTH_LOW       : Faster
  MTH_MEDIUM    : Some optimisation
  MTH_HIGH      : Slow : no optimisation
 
 Targets :
  MTH_PC_DEV            : PC machine
  MTH_U64_DEV           : Nitendo 64
  MTH_UNKOWN_DEV        : Unkown machine
 
 Type of Real :
  MTH_RealIsFloat       : float
  MTH_RealIsDouble      : double
  MTH_RealIsFixed16_16  : Fixed 16-16 (Not Yet Implemented
 
 Compilators :
  VISUAL        : Visual C
  WATCOM        : Watcom C
*/
  
 
 
 
 
/* ##MACRO#----------------------------------------------------------------------------
                          MACRO definition
---------------------------------------------------------------------------------------*/
 
/* ##-####################################### 
   ## Link Macros with to Optimized versions
   ########################################## */
 
/* ****************** */
/* *** MTH_M_xDiv *** */
/* ****************** */
#define MTH_M_xQuickDiv MTH_M_xDivLow
 
#ifdef MTH_LOW
#define MTH_M_xDivOpt MTH_M_xDivLow
#endif /* MTH_LOW */
#ifdef MTH_MEDIUM
#define MTH_M_xDivOpt MTH_M_xDivMedium
#endif /* MTH_MEDIUM */
#ifdef MTH_HIGH
#define MTH_M_xDivOpt MTH_M_xDivHigh
#endif /* MTH_HIGH */
 
/* ****************** */
/* *** MTH_M_xSqrt ** */
/* ****************** */
#define MTH_M_xQuickSqrt MTH_M_xSqrtLow
 
#ifdef MTH_LOW
#define MTH_M_xSqrtOpt MTH_M_xSqrtLow
#endif /* MTH_LOW */
#ifdef MTH_MEDIUM
#define MTH_M_xSqrtOpt MTH_M_xSqrtMedium
#endif /* MTH_MEDIUM */
#ifdef MTH_HIGH
#define MTH_M_xSqrtOpt MTH_M_xSqrtHigh
#endif /* MTH_HIGH */
 
/* ********************* */
/* *** MTH_M_xInvSqrt ** */
/* ********************* */
#define MTH_M_xQuickInvSqrt MTH_M_xInvSqrtLow
 
#ifdef MTH_LOW
#define MTH_M_xInvSqrtOpt MTH_M_xInvSqrtLow
#endif /* MTH_LOW */
#ifdef MTH_MEDIUM
#define MTH_M_xInvSqrtOpt MTH_M_xInvSqrtMedium
#endif /* MTH_MEDIUM */
#ifdef MTH_HIGH
#define MTH_M_xInvSqrtOpt MTH_M_xInvSqrtHigh
#endif /* MTH_HIGH */
 
/* ***************** */
/* *** MTH_M_xInv ** */
/* ***************** */
#define MTH_M_xQuickInv MTH_M_xInvLow
 
#ifdef MTH_LOW
#define MTH_M_xInvOpt MTH_M_xInvLow
#endif /* MTH_LOW */
#ifdef MTH_MEDIUM
#define MTH_M_xInvOpt MTH_M_xInvMedium
#endif /* MTH_MEDIUM */
#ifdef MTH_HIGH
#define MTH_M_xInvOpt MTH_M_xInvHigh
#endif /* MTH_HIGH */
 
/* ***************************** */
/* *** MTH_M_xRealToLongRound ** */
/* ***************************** */
#ifdef MTH_LOW
#define MTH_M_xRealToLongRoundOpt MTH_M_xRealToLongRoundLow
#endif /* MTH_LOW */
#ifdef MTH_MEDIUM
#define MTH_M_xRealToLongRoundOpt MTH_M_xRealToLongRoundMedium
#endif /* MTH_MEDIUM */
#ifdef MTH_HIGH
#define MTH_M_xRealToLongRoundOpt MTH_M_xRealToLongRoundHigh
#endif /* MTH_HIGH */
 
/* ************************ */
/* *** MTH_M_xRealToLong ** */
/* ************************ */
#ifdef MTH_LOW
#define MTH_M_xRealToLongOpt MTH_M_xRealToLongLow
#endif /* MTH_LOW */
#ifdef MTH_MEDIUM
#define MTH_M_xRealToLongOpt MTH_M_xRealToLongMedium
#endif /* MTH_MEDIUM */
#ifdef MTH_HIGH
#define MTH_M_xRealToLongOpt MTH_M_xRealToLongHigh
#endif /* MTH_HIGH */
 
 
 
 
 
 
/* ##-############################# 
   ## Optimization implementations
   ################################ */
 
 
/* ##-############################# 
   ## RealToLongRound
   ################################ */
 
/* ##M==================================================================================
NAME :          MTH_M_xRealToLongRoundLow
DESCRIPTION :   Return long round of a real number : Low precision
INPUT :         a  : MTH_tdxReal
OUTPUT :        round(a) : long
=======================================================================================*/
#define MTH_M_xRealToLongRoundLow       MTH_fn_fFloat2LongRoundOpt
 
/* ##M==================================================================================
NAME :          MTH_M_xRealToLongRoundMedium
DESCRIPTION :   Return long round of a real number : Medium precision
INPUT :         a  : MTH_tdxReal
OUTPUT :        round(a) : long
=======================================================================================*/
#define MTH_M_xRealToLongRoundMedium(A) (long) ( (long)(A*2.0F)-(long)(A) )
 
/* ##M==================================================================================
NAME :          MTH_M_xRealToLongRoundHigh
DESCRIPTION :   Return long round of a real number : High precision
INPUT :         a  : MTH_tdxReal
OUTPUT :        round(a) : long
=======================================================================================*/
#define MTH_M_xRealToLongRoundHigh(A)   (long) ( (long)(A*2.0F)-(long)(A) )
 
 
 
 
 
 
/* ##-############################# 
   ##  RealToLong
   ################################ */
 
/* ##M==================================================================================
NAME :          MTH_M_xRealToLongLow
DESCRIPTION :   Return long cast of a real number : Low precision
INPUT :         a  : MTH_tdxReal
OUTPUT :        round(a) : long
=======================================================================================*/
#define MTH_M_xRealToLongLow            MTH_fn_fFloat2LongOpt
 
/* ##M==================================================================================
NAME :          MTH_M_xRealToLongMedium
DESCRIPTION :   Return long cast of a real number : Medium precision
INPUT :         a  : MTH_tdxReal
OUTPUT :        round(a) : long
=======================================================================================*/
#define MTH_M_xRealToLongMedium(X)      (long) ( X )
 
/* ##M==================================================================================
NAME :          MTH_M_xRealToLongHigh
DESCRIPTION :   Return long cast of a real number : High precision
INPUT :         a  : MTH_tdxReal
OUTPUT :        round(a) : long
=======================================================================================*/
#define MTH_M_xRealToLongHigh(X)        (long) ( X )
 
 
 
 
 
 
 
/* ##-############################# 
   ##  SQRT
   ################################ */
 
/* ** U64 : */
#ifdef U64 
 
/* ##M==================================================================================
NAME :          MTH_M_xSqrtLow
DESCRIPTION :   Return the square root of a real number : Low precision/U64
INPUT :         a       : MTH_tdxReal
OUTPUT :        sqrt(a) : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xSqrtLow( A )             \
 ( sqrtf( A ) )
 
/* ##M==================================================================================
NAME :          MTH_M_xSqrtMedium
DESCRIPTION :   Return the square root of a real number : Medium precision/U64
INPUT :         a       : MTH_tdxReal
OUTPUT :        sqrt(a) : MTH_tdxReal
=======================================================================================*/    
#define MTH_M_xSqrtMedium( A )          \
 ( sqrtf( A ) )
 
/* ##M==================================================================================
NAME :          MTH_M_xSqrtHigh
DESCRIPTION :   Return the square root of a real number : High precision/U64
INPUT :         a       : MTH_tdxReal
OUTPUT :        sqrt(a) : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xSqrtHigh( A )            \
 ( sqrtf( A ) )
     
#else /* NOT U64 => PC */
/* ** PC : */
 
/* ##M==================================================================================
NAME :          MTH_M_xSqrtLow
DESCRIPTION :   Return the square root of a real number : Low precision/PC
INPUT :         a       : MTH_tdxReal
OUTPUT :        sqrt(a) : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xSqrtLow( A )             \
 ( MTH_fn_fSquareRootOpt( A ) )
     
/* ##M==================================================================================
NAME :          MTH_M_xSqrtMedium
DESCRIPTION :   Return the square root of a real number : Medium precision/PC
INPUT :         a       : MTH_tdxReal
OUTPUT :        sqrt(a) : MTH_tdxReal
=======================================================================================*/    
#ifdef MTH_CHECK
#define MTH_M_xSqrtMedium( A )  MTH_fn_xSqrtCHK( A )
#else /* NOT MTH_CHECK */       
#define MTH_M_xSqrtMedium( A )          \
                ( (float) sqrt((double) (A)) )
#endif /* MTH_CHECK OR NOT */
 
/* ##M==================================================================================
NAME :          MTH_M_xSqrtHigh
DESCRIPTION :   Return the square root of a real number : High precision/PC
INPUT :         a       : MTH_tdxReal
OUTPUT :        sqrt(a) : MTH_tdxReal
=======================================================================================*/
#ifdef MTH_CHECK
#define MTH_M_xSqrtHigh( A )    MTH_fn_xSqrtCHK( A )
#else /* NOT MTH_CHECK */       
#define MTH_M_xSqrtHigh( A )            \
                ( (float) sqrt((double) (A)) )
#endif /* MTH_CHECK OR NOT */
     
#endif /* U64 OR PC */
 
 
 
 
 
 
     
 
/* ##-############################# 
   ##  INV
   ################################ */
 
/* ** U64 : */
#ifdef U64
 
/* ##M==================================================================================
NAME :          MTH_M_xInvLow
DESCRIPTION :   Return the inverse of a real number : Low precision/U64
INPUT :         a   : MTH_tdxReal
OUTPUT :        1/a : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xInvLow( A )              \
                (  MTH_C_ONE / (A) )
 
/* ##M==================================================================================
NAME :          MTH_M_xInvMedium
DESCRIPTION :   Return the inverse of a real number : Medium precision/U64
INPUT :         a   : MTH_tdxReal
OUTPUT :        1/a : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xInvMedium( A )           \
                (  MTH_C_ONE / (A) )
 
/* ##M==================================================================================
NAME :          MTH_M_xInvHigh
DESCRIPTION :   Return the inverse of a real number : High precision/U64
INPUT :         a   : MTH_tdxReal
OUTPUT :        1/a : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xInvHigh( A )             \
                (  MTH_C_ONE / (A) )
     
#else /* NOT U64 => PC */
/* ** PC : */
 
/* ##M==================================================================================
NAME :          MTH_M_xInvLow
DESCRIPTION :   Return the inverse of a real number : Low precision/PC
INPUT :         a   : MTH_tdxReal
OUTPUT :        1/a : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xInvLow( A )              \
 ( MTH_fn_fInverseOpt( A ) )
 
/* ##M==================================================================================
NAME :          MTH_M_xInvMedium
DESCRIPTION :   Return the inverse of a real number : Medium precision/PC
INPUT :         a   : MTH_tdxReal
OUTPUT :        1/a : MTH_tdxReal
=======================================================================================*/
#ifdef MTH_CHECK        
#define MTH_M_xInvMedium( A )    MTH_fn_xInvCHK( A )
#else /* NOT MTH_CHECK */
#define MTH_M_xInvMedium( A )           \
                (  MTH_C_ONE / (A) )
#endif /* MTH_CHECK OR NOT */
     
/* ##M==================================================================================
NAME :          MTH_M_xInvHigh
DESCRIPTION :   Return the inverse of a real number : High precision/PC
INPUT :         a   : MTH_tdxReal
OUTPUT :        1/a : MTH_tdxReal
=======================================================================================*/
#ifdef MTH_CHECK        
#define MTH_M_xInvHigh( A )     MTH_fn_xInvCHK( A )
#else /* NOT MTH_CHECK */
#define MTH_M_xInvHigh( A )             \
                (  MTH_C_ONE / (A) )
#endif /* MTH_CHECK OR NOT */
     
#endif /* U64 OR PC */
 
 
 
 
 
 
 
     
 
/* ##-############################# 
   ##  DIV
   ################################ */
 
/* ** U64 : */
#ifdef U64
 
/* ##M==================================================================================
NAME :          MTH_M_xDivLow
DESCRIPTION :   Return the division of two real numbers : Low precision/U64
INPUT :         a, b  : 2 MTH_tdxReal
OUTPUT :        a / b : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xDivLow( A, B)                    \
                ((A) / (B))
 
/* ##M==================================================================================
NAME :          MTH_M_xDivMedium
DESCRIPTION :   Return the division of two real numbers : Medium precision/U64
INPUT :         a, b  : 2 MTH_tdxReal
OUTPUT :        a / b : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xDivMedium( A, B)                 \
                ((A) / (B))
 
/* ##M==================================================================================
NAME :          MTH_M_xDivHigh
DESCRIPTION :   Return the division of two real numbers : High precision/U64
INPUT :         a, b  : 2 MTH_tdxReal
OUTPUT :        a / b : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xDivHigh( A, B)                   \
                ((A) / (B))
     
#else /* NOT U64 => PC */
/* ** PC : */
 
/* ##M==================================================================================
NAME :          MTH_M_xDivLow
DESCRIPTION :   Return the division of two real numbers : Low precision/PC
INPUT :         a, b  : 2 MTH_tdxReal
OUTPUT :        a / b : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xDivLow( A, B )           \
 ( MTH_M_xMul( (A), ( MTH_fn_fInverseOpt( B ) ) ) )
 
/* ##M==================================================================================
NAME :          MTH_M_xDivMedium
DESCRIPTION :   Return the division of two real numbers : Medium precision/PC
INPUT :         a, b  : 2 MTH_tdxReal
OUTPUT :        a / b : MTH_tdxReal
=======================================================================================*/
#ifdef MTH_CHECK        
#define MTH_M_xDivMedium( A, B)         MTH_fn_xDivCHK( A, B)
#else /* NOT MTH_CHECK */
#define MTH_M_xDivMedium( A, B)                 \
                ((A) / (B))
#endif /* MTH_CHECK OR NOT */
 
/* ##M==================================================================================
NAME :          MTH_M_xDivHigh
DESCRIPTION :   Return the division of two real numbers : High precision/PC
INPUT :         a, b  : 2 MTH_tdxReal
OUTPUT :        a / b : MTH_tdxReal
=======================================================================================*/
#ifdef MTH_CHECK        
#define MTH_M_xDivHigh( A, B)            MTH_fn_xDivCHK( A, B) 
#else /* NOT MTH_CHECK */
#define MTH_M_xDivHigh( A, B)                   \
                ((A) / (B))
#endif /* MTH_CHECK OR NOT */
 
#endif /* U64 OR PC */
 
 
 
 
 
 
    
/* ##-############################# 
   ##  INV SQRT
   ################################ */
 
/* ** U64 : */
#ifdef U64
 
/* ##M==================================================================================
NAME :          MTH_M_xInvSqrtLow
DESCRIPTION :   Return the inverse of the square root of a real number : Low precision/U64
INPUT :         a         : MTH_tdxReal
OUTPUT :        1/sqrt(a) : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xInvSqrtLow( A )          \
 (  MTH_C_ONE / sqrtf( A ) )
 
/* ##M==================================================================================
NAME :          MTH_M_xInvSqrtMedium
DESCRIPTION :   Return the inverse of the square root of a real number : Medium precision/U64
INPUT :         a         : MTH_tdxReal
OUTPUT :        1/sqrt(a) : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xInvSqrtMedium( A )               \
 (  MTH_C_ONE / sqrtf( A ) )
 
/* ##M==================================================================================
NAME :          MTH_M_xInvSqrtHigh
DESCRIPTION :   Return the inverse of the square root of a real number : High precision/U64
INPUT :         a         : MTH_tdxReal
OUTPUT :        1/sqrt(a) : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xInvSqrtHigh( A )         \
 (  MTH_C_ONE / sqrtf( A ) )
     
#else /* NOT U64 => PC */
/* ** PC : */
 
/* ##M==================================================================================
NAME :          MTH_M_xInvSqrtLow
DESCRIPTION :   Return the inverse of the square root of a real number : Low precision/PC
INPUT :         a         : MTH_tdxReal
OUTPUT :        1/sqrt(a) : MTH_tdxReal
=======================================================================================*/
#define MTH_M_xInvSqrtLow( A )          \
 ( MTH_fn_fInvSquareRootOpt( A ) )
 
/* ##M==================================================================================
NAME :          MTH_M_xInvSqrtMedium
DESCRIPTION :   Return the inverse of the square root of a real number : Medium precision/PC
INPUT :         a         : MTH_tdxReal
OUTPUT :        1/sqrt(a) : MTH_tdxReal
=======================================================================================*/
#ifdef MTH_CHECK        
#define MTH_M_xInvSqrtMedium( A )        MTH_fn_xInvSqrtCHK( A )
#else /* NOT MTH_CHECK */
#define MTH_M_xInvSqrtMedium( A )               \
                ( MTH_C_ONE / (float) sqrt((double) (A)) )
#endif /* MTH_CHECK OR NOT */
 
/* ##M==================================================================================
NAME :          MTH_M_xInvSqrtHigh
DESCRIPTION :   Return the inverse of the square root of a real number : High precision/PC
INPUT :         a         : MTH_tdxReal
OUTPUT :        1/sqrt(a) : MTH_tdxReal
=======================================================================================*/
#ifdef MTH_CHECK
#define MTH_M_xInvSqrtHigh( A )         MTH_fn_xInvSqrtCHK( A )
#else /* NOT MTH_CHECK */
#define MTH_M_xInvSqrtHigh( A )         \
                ( MTH_C_ONE / (float) sqrt((double) (A)) )
#endif /* MTH_CHECK OR NOT */
     
#endif /* U64 OR PC */
    
 
 
   
 
     
 
    
/* ##-############################# 
   ##  INIT
   ################################ */
 
/* ##M==================================================================================
NAME :          MTH_M_vInit
DESCRIPTION :   Initialization
INPUT :         void
OUTPUT :        void
=======================================================================================*/
#ifdef U64
#define MTH_M_vInit()                   \
{}
#else /* PC */
 
/* LOW : */
#ifdef MTH_LOW
#define MTH_M_vInit()                   \
 {   MTH_fn_vInit();                    \
  }
#endif /* MTH_LOW */
 
/* MEDIUM : */
#ifdef MTH_MEDIUM
#define MTH_M_vInit()                   \
 {   MTH_fn_vInit();                    \
  }
 
#endif /* MTH_MEDIUM */
 
/* HIGH : */
#ifdef MTH_HIGH
#define MTH_M_vInit()                   \
 {   MTH_fn_vInit();                    \
  }
 
#endif /* MTH_HIGH */
 
#endif /* U64 OR PC */
 
 
 
 
 
 
 
 
/* --------------------------------------------------------------------------------------- */
/* ------------ This part is not Public, but is here for __inline function --------------- */
/* ------------ !!! DO NOT USE THIS DIRECTLY !!! - FOR PC ONLY --------------------------- */
/* ------------ WARNING : these functions can disrupting others algoritms ---------------- */
/* --------------------------------------------------------------------------------------- */
 
/* #define MTH_PARANOID */ /* Uncomment this to armor code */
 
/* NOT for U64 */
#ifndef U64
 
 
/* ##-########################### 
   ## Set Pentium FPU Precision
   ############################## */
 
/* ##F===================================================================================
NAME :          MTH_fn_vSet24bitFPU
DESCRIPTION :   Set Pentium FPU internal precision to 24bit
INPUT :         void
OUTPUT :        void
=======================================================================================*/
INLINE void MTH_CALL  MTH_fn_vSet24bitFPU(void)
{
  long memvar; 
 
  _asm{
#ifdef MTH_PARANOID
      push eax
#endif /* MTH_PARANOID */
 
      finit
      fwait
      fstcw [memvar]
      fwait
      mov eax, [memvar]
      and eax, 0fffffcffh
      mov [memvar], eax
      fldcw [memvar]
      fwait
 
#ifdef MTH_PARANOID
      pop eax
#endif /* MTH_PARANOID */
      }
}
 
/* ##F===================================================================================
NAME :          MTH_fn_vSet53bitFPU
DESCRIPTION :   Set Pentium FPU internal precision to 53bit
INPUT :         void
OUTPUT :        void
=======================================================================================*/
INLINE void MTH_CALL MTH_fn_vSet53bitFPU(void)
{
  long memvar;
 
  _asm{
#ifdef MTH_PARANOID
      push eax
#endif /* MTH_PARANOID */
      finit
      fwait
      fstcw [memvar]
      fwait
      mov eax, [memvar]
      and eax, 0fffffeffh
      mov [memvar], eax
      fldcw [memvar]
      fwait
 
#ifdef MTH_PARANOID
      pop eax
#endif /* MTH_PARANOID */
      }
}
 
/* ##F===================================================================================
NAME :          MTH_fn_vSet64bitFPU
DESCRIPTION :   Set Pentium FPU internal precision to 64bit
INPUT :         void
OUTPUT :        void
=======================================================================================*/
INLINE void MTH_CALL MTH_fn_vSet64bitFPU(void)
{
  long memvar;
 
  _asm{
#ifdef MTH_PARANOID
      push eax
#endif /* MTH_PARANOID */
 
      finit
      fwait
      fstcw [memvar]
      fwait
      mov eax, [memvar]
      and eax, 0ffffffffh
      mov [memvar], eax
      fldcw [memvar]
      fwait
#ifdef MTH_PARANOID
      pop eax
#endif /* MTH_PARANOID */ 
      }
}
 
 
 
 
 
 
/* ##-########################### 
   ## FloatToLong 
   ############################## */
 
/* WARNING : These functions are experimental and for PC Only.
             Try it, measure performance and degradation before making your choice.
             Both C and ASM version are avaible, C version is faster than ASM version
             for Visual C, because compiler can optimise with surround code.
 */
 
 
extern CPA_EXPORT double MTH_gs_dDecal; /* = 3.0F*pow(2, 51); */
 
/* ##F===================================================================================
NAME :          MTH_fn_fFloat2LongRoundOptASM
DESCRIPTION :   Return long round of a real number : asm version
                Do not use this function ! MTH_fn_fFloat2LongRoundOpt is more efficient.
INPUT :         a        : float
OUTPUT :        round(a) : long
=======================================================================================*/
INLINE long  MTH_fn_fFloat2LongRoundOptASM  (float f)
{
  long res;
 
  _asm{
    fld dword ptr f
    fadd qword ptr MTH_gs_dDecal
    fstp qword ptr res    
  }
 
  return (res);
}
 
/* ##F===================================================================================
NAME :          MTH_fn_fFloat2LongRoundOpt
DESCRIPTION :   Return long round of a real number : C version (more optimized than asm)
                Exemples : 7.7 return 8, -7.7 return -8
INPUT :         a        : float
OUTPUT :        round(a) : long
=======================================================================================*/
INLINE long MTH_fn_fFloat2LongRoundOpt (double f)
{
  long res;
 
  MTH_M_vCHK(f);
 
  f+= MTH_gs_dDecal;
  res= *(long *)&f;
 
  return (res);
}
 
/* ##F===================================================================================
NAME :          MTH_fn_fFloat2LongOptASM
DESCRIPTION :   Return long truncation of a real number : asm version
                Do not use this function ! MTH_fn_fFloat2LongOpt is more efficient.
INPUT :         a        : float
OUTPUT :        (long)(a) : long
=======================================================================================*/
INLINE long  MTH_fn_fFloat2LongOptASM  (float f)
{
  long res;
  double m=0.49999999;
 
  _asm{
    mov ebx, f
    test ebx, 0x80000000
    jne F2L_negativ
    fld dword ptr f      
    fsub qword ptr m
    fadd qword ptr MTH_gs_dDecal
    fstp qword ptr res
    jmp F2L_fin
 
    F2L_negativ:
    and ebx, 0x7FFFFFFF
    mov f, ebx
    fld dword ptr f      
    fsub qword ptr m
    fadd qword ptr MTH_gs_dDecal
    xor ebx, ebx
    fstp qword ptr res
    sub ebx, res
    mov res, ebx    
    F2L_fin :
  }
 
  return (res);
}
 
/* ##F===================================================================================
NAME :          MTH_fn_fFloat2LongRoundOpt
DESCRIPTION :   Return long truncation of a real number : C version (more optimized than asm)
                Examples : 7.7 return 7, -7.7 return -7
                Warning : Precision of 0.000001
INPUT :         a        : float
OUTPUT :        (long)(a) : long
=======================================================================================*/
INLINE long  MTH_fn_fFloat2LongOpt  (double f)
{
  long res;
  static  double m=0.499999; /* Do not add `9` unless you decrease outpout domaine */
 
  MTH_M_vCHK(f);
 
  if( f>0)
    {
      f-= m;
      f+= MTH_gs_dDecal; 
      res= *(long *)&f;
      return (res);
    }
  else
    {
      f= -f;
      f-= m;
      f+= MTH_gs_dDecal; 
      res= *(long *)&f;
      return (-res);
    }
} 
 
 
 
 
 
 
 
 
/* ##-########################### 
   ## TABLE BASED OPTIMIZATION
   ############################## */
 
/* WARNING : These functions are experimental and for PC Only.
             Performance depend of context !
             Best performance for serial calculous (less cache miss).
             Try it, measure performance and degradation before making your choice.
 */
 
extern CPA_EXPORT unsigned long MTH_g_a2048_fSquareRootTable[1024*2];
 
/* ##F===================================================================================
NAME :          MTH_fn_fSquareRootOpt
DESCRIPTION :   Return square root of a real number : optimized with table.
INPUT :         a        : float
OUTPUT :        sqrt(a)  : float
=======================================================================================*/
INLINE float MTH_CALL MTH_fn_fSquareRootOpt(float f)
{
  float res_sqrt;
 
  MTH_M_vCHK(f);
 
  /* f= (-1)^s.2^E.[1.M] */
  _asm{
#ifdef MTH_PARANOID
        push ebx
        push eax
#endif /* MTH_PARANOID */
 
        mov  ebx,f              /* ebx = f */
        mov  eax,f              /* eax = f */
        and  ebx,0x7F800000     /* ebx = E */
 
        and  eax,0x00FFE000     /* eax = 1st bit of E & M */
 
        add  ebx,0x3F800000     /* ebx= E + (127<<23) */
        shr  ebx,1              /* ebx = ebx/2 */
 
        shr  eax,11             /* eax = index on table */
 
        and  ebx,0x7F800000     /* ebx = new E */
        add  ebx,dword ptr[MTH_g_a2048_fSquareRootTable+eax]    /* Get from table */
        mov  dword ptr[res_sqrt],ebx
 
#ifdef MTH_PARANOID
        pop eax
        pop ebx
#endif /* MTH_PARANOID */
      }
 
  MTH_M_vCHK(res_sqrt);
 
  return (res_sqrt);
}
 
 
extern CPA_EXPORT unsigned long MTH_g_a1024_fInverse[1024];
 
/* ##F===================================================================================
NAME :          MTH_fn_fInverseOpt
DESCRIPTION :   Return inverse of a real number : optimized with table.
INPUT :         a    : float
OUTPUT :        1/a  : float
=======================================================================================*/
INLINE float MTH_CALL MTH_fn_fInverseOpt(float f)
{
  float res_inv;
 
  MTH_M_vCHK(f);
 
  _asm{        
#ifdef MTH_PARANOID
      push ecx
      push ebx
      push eax
#endif /* MTH_PARANOID */
 
      mov  ebx,f
      mov  ecx,0x7E800000                               /* 1 Clocks                          */
      mov  eax,ebx
      and  eax,0x007FE000                               /* 1 Clocks                          */
      and  ebx,0xFF800000
      shr  eax,11                                       /* 1 Clocks                          */
      sub  ecx,ebx
      add  ecx,dword ptr[MTH_g_a1024_fInverse + eax]    /* 3 Clocks     Exp_AGI_U_Pem:1      */
      mov  dword ptr[res_inv],ecx                       /* 1 Clocks     Exp_Flow_Dep_ecx     */
 
#ifdef MTH_PARANOID
      pop eax
      pop ebx
      pop ecx
#endif /* MTH_PARANOID */
      }
 
  MTH_M_vCHK(res_inv);
 
  return (res_inv);
}
 
 
extern CPA_EXPORT unsigned long MTH_g_a2048_fInvSquareRootTable[1024*2];
 
/* ##F===================================================================================
NAME :          MTH_fn_fInvSquareRootOpt
DESCRIPTION :   Return inverse square root of a real number : optimized with table.
INPUT :         a          : float
OUTPUT :        1/sqrt(a)  : float
=======================================================================================*/
INLINE float MTH_CALL MTH_fn_fInvSquareRootOpt(float f)
{
  float res_invsqrt;
  /* To test vality of this function :
  float res_high;
  char  c_test[30];
  */
 
  MTH_M_vCHK(f);
 
  /* f= (-1)^s.2^E.[1.M] */
  _asm{
#ifdef MTH_PARANOID
      push ecx
      push ebx
      push eax
#endif /* MTH_PARANOID */
 
      mov  ecx,f                /* ecx = f */                   
      mov  eax,f                /* eax = f, to allow pairing */
      and  ecx,0x7F800000       /* ecx = E */
        
      mov  ebx,0xBD800000       /* ebx= 379 << 23 */
 
      and  eax,0x00FFE000       /* 1st bit of E (odd/even) & 10 high of M */ 
 
      sub  ebx,ecx              /* ebx= (379 << 23) -E */
      shr  ebx,1                /* ebx= ebx/2 */
      
      shr  eax,11               /* eax = index on table */ 
      and  ebx,0x7F800000       /* ebx = new E */
      add  ebx,dword ptr[MTH_g_a2048_fInvSquareRootTable + eax] /* Get from table */
      mov  dword ptr[res_invsqrt],ebx
#ifdef MTH_PARANOID
      pop eax
      pop ebx
      pop ecx
#endif /* MTH_PARANOID */
  }
  /*
  res_high= 1.0F/sqrt((float)f);
  assert( abs(1.0F-res_high/res_invsqrt) <0.001 );
  */
 
  MTH_M_vCHK(res_invsqrt);
 
  return (res_invsqrt);
}
 
 
 
/* ##F===================================================================================
NAME :          MTH_fn_vInit
DESCRIPTION :   Initialize tables
INPUT :         void
OUTPUT :        void
=======================================================================================*/ 
extern CPA_EXPORT void MTH_fn_vInit( void );
 
 
/* ======================================================================================
NAME :          MTH_M_bEqualZeroAsm
DESCRIPTION :   Tests if a float number equals zero
INPUT :         A          : float
OUTPUT :        A==0	   : unsigned char
Author:			Yann Le Tensorer sept 25,1998
Cycles:			4 on Pentium, 3 on Pentium II
Note:			Faster than the "normal" way for both pentium & Pentium II
				return type is unsigned char, so it is tested by visual C with "test al,al"
=======================================================================================*/
#pragma warning( disable : 4035 )
static __inline unsigned char __fastcall MTH_M_bEqualZeroAsm( float A) 
	{
		__asm
		{
			mov ecx,A
			mov al,0			;default return value is 0 (not equal to 0)
			and ecx,0x7FFFFFFF	;very important: clear sign flag because negative zero also compares to zero...
			cmp ecx,1			;cf is set only if ecx=0 (we compare to 1, so cf is set if ecx is strictly smaller then 1)
			adc al,0			;add carry to al
								;no need of return, default returned vaue is in al
		}
	}
 
/* ======================================================================================
NAME :          MTH_M_bDifferentZeroAsm
DESCRIPTION :   Tests if a float number is different from zero
INPUT :         A          : float
OUTPUT :        A!=0	   : unsigned char
Author:			Yann Le Tensorer sept 25,1998
Cycles:			4 on Pentium, 3 on Pentium II
Note:			Faster than the "normal" way for both pentium & Pentium II
				return type is unsigned char, so it is tested by visual C with "test al,al"
=======================================================================================*/
 
static __inline unsigned char __fastcall MTH_M_bDifferentZeroAsm( float A) 
	{
		__asm
		{
			mov ecx,A
			mov al,1			;default return value is 0 (not equal to 0)
			and ecx,0x7FFFFFFF	;very important: clear sign flag because negative zero also compares to zero...
			cmp ecx,1			;cf is set only if ecx=0 (we compare to 1, so cf is set if ecx is strictly smaller then 1)
			sbb al,0			;sub carry to al
								;no need of return, default returned vaue is in al
		}
	}
 
 
/* ======================================================================================
NAME :          MTH_M_bGreaterZeroAsm
DESCRIPTION :   Tests if a float number is strictly greater than zero
INPUT :         A		: float
OUTPUT :        A>0		: unsigned char
Author:			Yann Le Tensorer sept 25,1998
Cycles:			3 on Pentium, 2 on Pentium II
Note:			Faster than the "normal" way for both pentium & Pentium II
=======================================================================================*/
 
static __inline unsigned char __fastcall MTH_M_bGreaterZeroAsm( float A) 
	{
		__asm
		{
			mov ecx,A
			mov al,0		; default is less or equal zero
			sub ecx,1		; 
			cmp ecx,0x7fffffff ;key of the algorithm: cf is set if (A-1)<0x7fffffff
			adc al,0		; add cf to al
		}
	}
 
 
/* ======================================================================================
NAME :          MTH_M_bLessOrEqualZeroAsm
DESCRIPTION :   Tests if a float number is strictly greater than zero
INPUT :         A		: float
OUTPUT :        A<=0	: unsigned char
Author:			Yann Le Tensorer sept 25,1998
Cycles:			4 on Pentium, 3 on Pentium II
Note:			Faster than the "normal" way for both pentium & Pentium II
=======================================================================================*/
 
static __inline unsigned char __fastcall MTH_M_bLessEqualZeroAsm( float A) 
	{
		__asm
		{
			mov ecx,A
			mov al,1		; default is less or equal zero
			sub ecx,1		; 
			cmp ecx,0x7fffffff ;key of the algorithm: cf is set if (A-1)<0x7fffffff
			sbb al,0		; add cf to al
		}
	}
 
/* ======================================================================================
NAME :          MTH_M_bLessZeroAsm
DESCRIPTION :   Tests if a float number is strictly less than zero
INPUT :         A		: float
OUTPUT :        A<0		: unsigned char
Author:			Yann Le Tensorer sept 25,1998
Cycles:			2 on Pentium,2 on Pentium II
Note:			Faster than the "normal" way for both pentium & Pentium II
=======================================================================================*/
 
static __inline unsigned char __fastcall MTH_M_bLessZeroAsm( float A) 
	{
		__asm
		{
			mov al,1
			cmp A,0x80000001
			sbb al,0
		}
	}
 
/* ======================================================================================
NAME :          MTH_M_bGreaterEqualZeroAsm
DESCRIPTION :   Tests if a float number is strictly less than zero
INPUT :         A		: float
OUTPUT :        A>=0	: unsigned char
Author:			Yann Le Tensorer sept 25,1998
Cycles:			2 on Pentium,2 on Pentium II
Note:			Faster than the "normal" way for both pentium & Pentium II
=======================================================================================*/
 
static __inline unsigned char __fastcall MTH_M_bGreaterEqualZeroAsm( float A) 
	{
		__asm
		{
			mov al,0
			cmp A,0x80000001
			adc al,0
		}
	}
 
 
 
 
 
 
#endif /* NOT U64 */
 
#if defined(__cplusplus)
    }
#endif
 
#endif /* MTH_FOPT_H */