* exp() is now 3 times faster.

* sinh, cosh, tanh now use D implementations rather than the C standard library. * tan() now uses the C library if asm is unavailable.
2025-05-10 14:08:32 +03:00 · 2009-02-23 09:11:43 +00:00 · 2009-02-23 09:11:43 +00:00 · ae4fd3b9f7
commit ae4fd3b9f7
parent 4fd01b9517
1 changed files with 134 additions and 9 deletions
--- a/std/math.d
+++ b/std/math.d
@ -69,6 +69,18 @@ private import std.string;
 private import std.c.math;
 private import std.traits;

+version(GNU){
+    // GDC can't actually do inline asm.
+} else version(D_InlineAsm_X86) {
+    version = Naked_D_InlineAsm_X86;
+} else version(LDC) {    
+    import ldc.intrinsics;
+    version(X86)
+    {
+        version = LDC_X86;
+    }
+}
+

 private:
 /*
@ -377,6 +389,7 @@ unittest{

 real tan(real x)
 {
+    version(Naked_D_InlineAsm_X86) {
    asm
    {
        fld     x[EBP]                  ; // load theta
@ -408,7 +421,10 @@ trigerr:
    return real.nan;

 Lret:
-    ;
+    ;    
+    } else {
+        return stdc.math.tanl(x);
+    }
 }

 unittest
@ -534,7 +550,12 @@ real atan2(real y, real x)      { return std.c.math.atan2l(y,x); }
 *      $(TR $(TD $(PLUSMN)$(INFIN)) $(TD $(PLUSMN)0.0) $(TD no) )
 *      )
 */
-real cosh(real x)               { return std.c.math.coshl(x); }
+real cosh(real x)               {
+    //  cosh = (exp(x)+exp(-x))/2.
+    // The naive implementation works correctly. 
+    real y = exp(x);
+    return (y + 1.0/y) * 0.5;
+}

 /***********************************
 * Calculates the hyperbolic sine of x.
@ -545,7 +566,18 @@ real cosh(real x)               { return std.c.math.coshl(x); }
 *      $(TR $(TD $(PLUSMN)$(INFIN)) $(TD $(PLUSMN)$(INFIN)) $(TD no))
 *      )
 */
-real sinh(real x)               { return std.c.math.sinhl(x); }
+real sinh(real x)
+{
+    //  sinh(x) =  (exp(x)-exp(-x))/2;    
+    // Very large arguments could cause an overflow, but
+    // the maximum value of x for which exp(x) + exp(-x)) != exp(x)
+    // is x = 0.5 * (real.mant_dig) * LN2. // = 22.1807 for real80.
+    if (fabs(x) > real.mant_dig * LN2) {
+        return copysign(0.5 * exp(fabs(x)), x);
+    }    
+    real y = expm1(x);
+    return 0.5 * y / (y+1) * (y+2);
+}

 /***********************************
 * Calculates the hyperbolic tangent of x.
@ -556,11 +588,15 @@ real sinh(real x)               { return std.c.math.sinhl(x); }
 *      $(TR $(TD $(PLUSMN)$(INFIN)) $(TD $(PLUSMN)1.0) $(TD no))
 *      )
 */
-real tanh(real x)               { return std.c.math.tanhl(x); }
-
-//real acosh(real x)            { return std.c.math.acoshl(x); }
-//real asinh(real x)            { return std.c.math.asinhl(x); }
-//real atanh(real x)            { return std.c.math.atanhl(x); }
+real tanh(real x)
+{
+    //  tanh(x) = (exp(x) - exp(-x))/(exp(x)+exp(-x))
+    if (fabs(x) > real.mant_dig * LN2) {
+        return copysign(1, x);        
+    }
+    real y = expm1(2*x);
+    return y / (y + 2);
+}

 /***********************************
 * Calculates the inverse hyperbolic cosine of x.
@ -742,7 +778,96 @@ creal sqrt(creal z)
 *      $(TR $(TD -$(INFIN)) $(TD +0.0) )
 *      )
 */
-real exp(real x)                { return std.c.math.expl(x); }
+real exp(real x)
+{
+    version(Naked_D_InlineAsm_X86) {
+        /*  exp() for x87 80-bit reals, IEEE754-2008 conformant.
+         * Author: Don Clugston.
+         * 
+         * exp(x) = 2^(rndint(y))* 2^(y-rndint(y)) where y = LN2*x.
+         * The trick for high performance is to avoid the fscale(28cycles on core2),
+         * frndint(19 cycles), leaving f2xm1(19 cycles) as the only slow instruction.
+         * 
+         * We can do frndint by using fist. BUT we can't use it for huge numbers,
+         * because it will set the Invalid Operation flag is overflow or NaN occurs.
+         * Fortunately, whenever this happens the result would be zero or infinity.
+         * 
+         * We can perform fscale by directly poking into the exponent. BUT this doesn't
+         * work for the (very rare) cases where the result is subnormal. So we fall back
+         * to the slow method in that case.
+         */
+        enum { PARAMSIZE = (real.sizeof+3)&(0xFFFF_FFFC) } // always a multiple of 4
+        asm {
+            naked;        
+            fld real ptr [ESP+4] ; // x
+            mov AX, [ESP+4+8]; // AX = exponent and sign
+            sub ESP, 12+8; // Create scratch space on the stack 
+            // [ESP,ESP+2] = scratchint
+            // [ESP+4..+6, +8..+10, +10] = scratchreal
+            // set scratchreal mantissa = 1.0
+            mov dword ptr [ESP+8], 0;
+            mov dword ptr [ESP+8+4], 0x80000000;
+            and AX, 0x7FFF; // drop sign bit
+            cmp AX, 0x401D; // avoid InvalidException in fist
+            jae L_extreme;
+            fldl2e;
+            fmul ; // y = x*log2(e)       
+            fist dword ptr [ESP]; // scratchint = rndint(y)
+            fisub dword ptr [ESP]; // y - rndint(y)
+            // and now set scratchreal exponent
+            mov EAX, [ESP];
+            add EAX, 0x3fff;
+            jle short L_subnormal;
+            cmp EAX,0x8000;
+            jge short L_overflow;
+            mov [ESP+8+8],AX;        
+    L_normal:
+            f2xm1;
+            fld1;
+            fadd ; // 2^(y-rndint(z))
+            fld real ptr [ESP+8] ; // 2^rndint(y)
+            add ESP,12+8;        
+            fmulp ST(1), ST;
+            ret PARAMSIZE;
+
+    L_subnormal:
+            // Result will be subnormal.
+            // In this rare case, the simple poking method doesn't work. 
+            // The speed doesn't matter, so use the slow fscale method.                
+            fild dword ptr [ESP];  // scratchint
+            fld1;
+            fscale;
+            fstp real ptr [ESP+8]; // scratchreal = 2^scratchint
+            fstp ST(0),ST;         // drop scratchint
+            jmp L_normal;
+            
+    L_extreme: // Extreme exponent. X is very large positive, very
+            // large negative, infinity, or NaN.
+            fxam;
+            fstsw AX;
+            test AX, 0x0400; // NaN_or_zero, but we already know x!=0 
+            jz L_was_nan;  // if x is NaN, returns x
+            // set scratchreal = real.min
+            // squaring it will return 0, setting underflow flag
+            mov word  ptr [ESP+8+8], 1;
+            test AX, 0x0200;
+            jnz L_waslargenegative;
+    L_overflow:        
+            // Set scratchreal = real.max.
+            // squaring it will create infinity, and set overflow flag.
+            mov word  ptr [ESP+8+8], 0x7FFE;
+    L_waslargenegative:        
+            fstp ST(0), ST;
+            fld real ptr [ESP+8];  // load scratchreal
+            fmul ST(0), ST;        // square it, to create havoc!
+    L_was_nan:
+            add ESP,12+8;
+            ret PARAMSIZE;
+        }
+    } else {       
+        return std.c.math.expl(x);
+    }
+}

 /**********************
 * Calculates 2$(SUP x).