* exp() is now 3 times faster.

* sinh, cosh, tanh now use D implementations rather than the C standard library. * tan() now uses the C library if asm is unavailable.
2025-05-13 15:52:41 +03:00 · 2009-02-23 09:11:43 +00:00 · 2009-02-23 09:11:43 +00:00 · ae4fd3b9f7
commit ae4fd3b9f7
parent 4fd01b9517
1 changed files with 134 additions and 9 deletions
--- a/std/math.d
+++ b/std/math.d
@ -69,6 +69,18 @@ private import std.string;
 private import std.c.math;
 private import std.traits;
 version(GNU){
    // GDC can't actually do inline asm.
 } else version(D_InlineAsm_X86) {
    version = Naked_D_InlineAsm_X86;
 } else version(LDC) {    
    import ldc.intrinsics;
    version(X86)
    {
        version = LDC_X86;
    }
 }
 private:
 /*
@ -377,6 +389,7 @@ unittest{
 real tan(real x)
 {
    version(Naked_D_InlineAsm_X86) {
    asm
    {
        fld     x[EBP]                  ; // load theta
@ -408,7 +421,10 @@ trigerr:
    return real.nan;
 Lret:
-    ;
+    ;    
    } else {
        return stdc.math.tanl(x);
    }
 }
 unittest
@ -534,7 +550,12 @@ real atan2(real y, real x)      { return std.c.math.atan2l(y,x); }
 *      $(TR $(TD $(PLUSMN)$(INFIN)) $(TD $(PLUSMN)0.0) $(TD no) )
 *      )
 */
-real cosh(real x)               { return std.c.math.coshl(x); }
+real cosh(real x)               {
    //  cosh = (exp(x)+exp(-x))/2.
    // The naive implementation works correctly. 
    real y = exp(x);
    return (y + 1.0/y) * 0.5;
 }
 /***********************************
 * Calculates the hyperbolic sine of x.
@ -545,7 +566,18 @@ real cosh(real x)               { return std.c.math.coshl(x); }
 *      $(TR $(TD $(PLUSMN)$(INFIN)) $(TD $(PLUSMN)$(INFIN)) $(TD no))
 *      )
 */
-real sinh(real x)               { return std.c.math.sinhl(x); }
+real sinh(real x)
 {
    //  sinh(x) =  (exp(x)-exp(-x))/2;    
    // Very large arguments could cause an overflow, but
    // the maximum value of x for which exp(x) + exp(-x)) != exp(x)
    // is x = 0.5 * (real.mant_dig) * LN2. // = 22.1807 for real80.
    if (fabs(x) > real.mant_dig * LN2) {
        return copysign(0.5 * exp(fabs(x)), x);
    }    
    real y = expm1(x);
    return 0.5 * y / (y+1) * (y+2);
 }
 /***********************************
 * Calculates the hyperbolic tangent of x.
@ -556,11 +588,15 @@ real sinh(real x)               { return std.c.math.sinhl(x); }
 *      $(TR $(TD $(PLUSMN)$(INFIN)) $(TD $(PLUSMN)1.0) $(TD no))
 *      )
 */
-real tanh(real x)               { return std.c.math.tanhl(x); }
+real tanh(real x)
-
+{
-//real acosh(real x)            { return std.c.math.acoshl(x); }
+    //  tanh(x) = (exp(x) - exp(-x))/(exp(x)+exp(-x))
-//real asinh(real x)            { return std.c.math.asinhl(x); }
+    if (fabs(x) > real.mant_dig * LN2) {
-//real atanh(real x)            { return std.c.math.atanhl(x); }
+        return copysign(1, x);        
    }
    real y = expm1(2*x);
    return y / (y + 2);
 }
 /***********************************
 * Calculates the inverse hyperbolic cosine of x.
@ -742,7 +778,96 @@ creal sqrt(creal z)
 *      $(TR $(TD -$(INFIN)) $(TD +0.0) )
 *      )
 */
-real exp(real x)                { return std.c.math.expl(x); }
+real exp(real x)
 {
    version(Naked_D_InlineAsm_X86) {
        /*  exp() for x87 80-bit reals, IEEE754-2008 conformant.
         * Author: Don Clugston.
         * 
         * exp(x) = 2^(rndint(y))* 2^(y-rndint(y)) where y = LN2*x.
         * The trick for high performance is to avoid the fscale(28cycles on core2),
         * frndint(19 cycles), leaving f2xm1(19 cycles) as the only slow instruction.
         * 
         * We can do frndint by using fist. BUT we can't use it for huge numbers,
         * because it will set the Invalid Operation flag is overflow or NaN occurs.
         * Fortunately, whenever this happens the result would be zero or infinity.
         * 
         * We can perform fscale by directly poking into the exponent. BUT this doesn't
         * work for the (very rare) cases where the result is subnormal. So we fall back
         * to the slow method in that case.
         */
        enum { PARAMSIZE = (real.sizeof+3)&(0xFFFF_FFFC) } // always a multiple of 4
        asm {
            naked;        
            fld real ptr [ESP+4] ; // x
            mov AX, [ESP+4+8]; // AX = exponent and sign
            sub ESP, 12+8; // Create scratch space on the stack 
            // [ESP,ESP+2] = scratchint
            // [ESP+4..+6, +8..+10, +10] = scratchreal
            // set scratchreal mantissa = 1.0
            mov dword ptr [ESP+8], 0;
            mov dword ptr [ESP+8+4], 0x80000000;
            and AX, 0x7FFF; // drop sign bit
            cmp AX, 0x401D; // avoid InvalidException in fist
            jae L_extreme;
            fldl2e;
            fmul ; // y = x*log2(e)       
            fist dword ptr [ESP]; // scratchint = rndint(y)
            fisub dword ptr [ESP]; // y - rndint(y)
            // and now set scratchreal exponent
            mov EAX, [ESP];
            add EAX, 0x3fff;
            jle short L_subnormal;
            cmp EAX,0x8000;
            jge short L_overflow;
            mov [ESP+8+8],AX;        
    L_normal:
            f2xm1;
            fld1;
            fadd ; // 2^(y-rndint(z))
            fld real ptr [ESP+8] ; // 2^rndint(y)
            add ESP,12+8;        
            fmulp ST(1), ST;
            ret PARAMSIZE;
    L_subnormal:
            // Result will be subnormal.
            // In this rare case, the simple poking method doesn't work. 
            // The speed doesn't matter, so use the slow fscale method.                
            fild dword ptr [ESP];  // scratchint
            fld1;
            fscale;
            fstp real ptr [ESP+8]; // scratchreal = 2^scratchint
            fstp ST(0),ST;         // drop scratchint
            jmp L_normal;
    L_extreme: // Extreme exponent. X is very large positive, very
            // large negative, infinity, or NaN.
            fxam;
            fstsw AX;
            test AX, 0x0400; // NaN_or_zero, but we already know x!=0 
            jz L_was_nan;  // if x is NaN, returns x
            // set scratchreal = real.min
            // squaring it will return 0, setting underflow flag
            mov word  ptr [ESP+8+8], 1;
            test AX, 0x0200;
            jnz L_waslargenegative;
    L_overflow:        
            // Set scratchreal = real.max.
            // squaring it will create infinity, and set overflow flag.
            mov word  ptr [ESP+8+8], 0x7FFE;
    L_waslargenegative:        
            fstp ST(0), ST;
            fld real ptr [ESP+8];  // load scratchreal
            fmul ST(0), ST;        // square it, to create havoc!
    L_was_nan:
            add ESP,12+8;
            ret PARAMSIZE;
        }
    } else {       
        return std.c.math.expl(x);
    }
 }
 /**********************
 * Calculates 2$(SUP x).