diff --git a/std/math.d b/std/math.d index 285c42730..e8765b125 100644 --- a/std/math.d +++ b/std/math.d @@ -69,6 +69,18 @@ private import std.string; private import std.c.math; private import std.traits; +version(GNU){ + // GDC can't actually do inline asm. +} else version(D_InlineAsm_X86) { + version = Naked_D_InlineAsm_X86; +} else version(LDC) { + import ldc.intrinsics; + version(X86) + { + version = LDC_X86; + } +} + private: /* @@ -377,6 +389,7 @@ unittest{ real tan(real x) { + version(Naked_D_InlineAsm_X86) { asm { fld x[EBP] ; // load theta @@ -408,7 +421,10 @@ trigerr: return real.nan; Lret: - ; + ; + } else { + return stdc.math.tanl(x); + } } unittest @@ -534,7 +550,12 @@ real atan2(real y, real x) { return std.c.math.atan2l(y,x); } * $(TR $(TD $(PLUSMN)$(INFIN)) $(TD $(PLUSMN)0.0) $(TD no) ) * ) */ -real cosh(real x) { return std.c.math.coshl(x); } +real cosh(real x) { + // cosh = (exp(x)+exp(-x))/2. + // The naive implementation works correctly. + real y = exp(x); + return (y + 1.0/y) * 0.5; +} /*********************************** * Calculates the hyperbolic sine of x. @@ -545,7 +566,18 @@ real cosh(real x) { return std.c.math.coshl(x); } * $(TR $(TD $(PLUSMN)$(INFIN)) $(TD $(PLUSMN)$(INFIN)) $(TD no)) * ) */ -real sinh(real x) { return std.c.math.sinhl(x); } +real sinh(real x) +{ + // sinh(x) = (exp(x)-exp(-x))/2; + // Very large arguments could cause an overflow, but + // the maximum value of x for which exp(x) + exp(-x)) != exp(x) + // is x = 0.5 * (real.mant_dig) * LN2. // = 22.1807 for real80. + if (fabs(x) > real.mant_dig * LN2) { + return copysign(0.5 * exp(fabs(x)), x); + } + real y = expm1(x); + return 0.5 * y / (y+1) * (y+2); +} /*********************************** * Calculates the hyperbolic tangent of x. @@ -556,11 +588,15 @@ real sinh(real x) { return std.c.math.sinhl(x); } * $(TR $(TD $(PLUSMN)$(INFIN)) $(TD $(PLUSMN)1.0) $(TD no)) * ) */ -real tanh(real x) { return std.c.math.tanhl(x); } - -//real acosh(real x) { return std.c.math.acoshl(x); } -//real asinh(real x) { return std.c.math.asinhl(x); } -//real atanh(real x) { return std.c.math.atanhl(x); } +real tanh(real x) +{ + // tanh(x) = (exp(x) - exp(-x))/(exp(x)+exp(-x)) + if (fabs(x) > real.mant_dig * LN2) { + return copysign(1, x); + } + real y = expm1(2*x); + return y / (y + 2); +} /*********************************** * Calculates the inverse hyperbolic cosine of x. @@ -742,7 +778,96 @@ creal sqrt(creal z) * $(TR $(TD -$(INFIN)) $(TD +0.0) ) * ) */ -real exp(real x) { return std.c.math.expl(x); } +real exp(real x) +{ + version(Naked_D_InlineAsm_X86) { + /* exp() for x87 80-bit reals, IEEE754-2008 conformant. + * Author: Don Clugston. + * + * exp(x) = 2^(rndint(y))* 2^(y-rndint(y)) where y = LN2*x. + * The trick for high performance is to avoid the fscale(28cycles on core2), + * frndint(19 cycles), leaving f2xm1(19 cycles) as the only slow instruction. + * + * We can do frndint by using fist. BUT we can't use it for huge numbers, + * because it will set the Invalid Operation flag is overflow or NaN occurs. + * Fortunately, whenever this happens the result would be zero or infinity. + * + * We can perform fscale by directly poking into the exponent. BUT this doesn't + * work for the (very rare) cases where the result is subnormal. So we fall back + * to the slow method in that case. + */ + enum { PARAMSIZE = (real.sizeof+3)&(0xFFFF_FFFC) } // always a multiple of 4 + asm { + naked; + fld real ptr [ESP+4] ; // x + mov AX, [ESP+4+8]; // AX = exponent and sign + sub ESP, 12+8; // Create scratch space on the stack + // [ESP,ESP+2] = scratchint + // [ESP+4..+6, +8..+10, +10] = scratchreal + // set scratchreal mantissa = 1.0 + mov dword ptr [ESP+8], 0; + mov dword ptr [ESP+8+4], 0x80000000; + and AX, 0x7FFF; // drop sign bit + cmp AX, 0x401D; // avoid InvalidException in fist + jae L_extreme; + fldl2e; + fmul ; // y = x*log2(e) + fist dword ptr [ESP]; // scratchint = rndint(y) + fisub dword ptr [ESP]; // y - rndint(y) + // and now set scratchreal exponent + mov EAX, [ESP]; + add EAX, 0x3fff; + jle short L_subnormal; + cmp EAX,0x8000; + jge short L_overflow; + mov [ESP+8+8],AX; + L_normal: + f2xm1; + fld1; + fadd ; // 2^(y-rndint(z)) + fld real ptr [ESP+8] ; // 2^rndint(y) + add ESP,12+8; + fmulp ST(1), ST; + ret PARAMSIZE; + + L_subnormal: + // Result will be subnormal. + // In this rare case, the simple poking method doesn't work. + // The speed doesn't matter, so use the slow fscale method. + fild dword ptr [ESP]; // scratchint + fld1; + fscale; + fstp real ptr [ESP+8]; // scratchreal = 2^scratchint + fstp ST(0),ST; // drop scratchint + jmp L_normal; + + L_extreme: // Extreme exponent. X is very large positive, very + // large negative, infinity, or NaN. + fxam; + fstsw AX; + test AX, 0x0400; // NaN_or_zero, but we already know x!=0 + jz L_was_nan; // if x is NaN, returns x + // set scratchreal = real.min + // squaring it will return 0, setting underflow flag + mov word ptr [ESP+8+8], 1; + test AX, 0x0200; + jnz L_waslargenegative; + L_overflow: + // Set scratchreal = real.max. + // squaring it will create infinity, and set overflow flag. + mov word ptr [ESP+8+8], 0x7FFE; + L_waslargenegative: + fstp ST(0), ST; + fld real ptr [ESP+8]; // load scratchreal + fmul ST(0), ST; // square it, to create havoc! + L_was_nan: + add ESP,12+8; + ret PARAMSIZE; + } + } else { + return std.c.math.expl(x); + } +} /********************** * Calculates 2$(SUP x).