mirror of
https://github.com/dlang/phobos.git
synced 2025-05-13 15:52:41 +03:00
* exp() is now 3 times faster.
* sinh, cosh, tanh now use D implementations rather than the C standard library. * tan() now uses the C library if asm is unavailable.
This commit is contained in:
parent
4fd01b9517
commit
ae4fd3b9f7
1 changed files with 134 additions and 9 deletions
143
std/math.d
143
std/math.d
|
@ -69,6 +69,18 @@ private import std.string;
|
||||||
private import std.c.math;
|
private import std.c.math;
|
||||||
private import std.traits;
|
private import std.traits;
|
||||||
|
|
||||||
|
version(GNU){
|
||||||
|
// GDC can't actually do inline asm.
|
||||||
|
} else version(D_InlineAsm_X86) {
|
||||||
|
version = Naked_D_InlineAsm_X86;
|
||||||
|
} else version(LDC) {
|
||||||
|
import ldc.intrinsics;
|
||||||
|
version(X86)
|
||||||
|
{
|
||||||
|
version = LDC_X86;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/*
|
/*
|
||||||
|
@ -377,6 +389,7 @@ unittest{
|
||||||
|
|
||||||
real tan(real x)
|
real tan(real x)
|
||||||
{
|
{
|
||||||
|
version(Naked_D_InlineAsm_X86) {
|
||||||
asm
|
asm
|
||||||
{
|
{
|
||||||
fld x[EBP] ; // load theta
|
fld x[EBP] ; // load theta
|
||||||
|
@ -408,7 +421,10 @@ trigerr:
|
||||||
return real.nan;
|
return real.nan;
|
||||||
|
|
||||||
Lret:
|
Lret:
|
||||||
;
|
;
|
||||||
|
} else {
|
||||||
|
return stdc.math.tanl(x);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unittest
|
unittest
|
||||||
|
@ -534,7 +550,12 @@ real atan2(real y, real x) { return std.c.math.atan2l(y,x); }
|
||||||
* $(TR $(TD $(PLUSMN)$(INFIN)) $(TD $(PLUSMN)0.0) $(TD no) )
|
* $(TR $(TD $(PLUSMN)$(INFIN)) $(TD $(PLUSMN)0.0) $(TD no) )
|
||||||
* )
|
* )
|
||||||
*/
|
*/
|
||||||
real cosh(real x) { return std.c.math.coshl(x); }
|
real cosh(real x) {
|
||||||
|
// cosh = (exp(x)+exp(-x))/2.
|
||||||
|
// The naive implementation works correctly.
|
||||||
|
real y = exp(x);
|
||||||
|
return (y + 1.0/y) * 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
/***********************************
|
/***********************************
|
||||||
* Calculates the hyperbolic sine of x.
|
* Calculates the hyperbolic sine of x.
|
||||||
|
@ -545,7 +566,18 @@ real cosh(real x) { return std.c.math.coshl(x); }
|
||||||
* $(TR $(TD $(PLUSMN)$(INFIN)) $(TD $(PLUSMN)$(INFIN)) $(TD no))
|
* $(TR $(TD $(PLUSMN)$(INFIN)) $(TD $(PLUSMN)$(INFIN)) $(TD no))
|
||||||
* )
|
* )
|
||||||
*/
|
*/
|
||||||
real sinh(real x) { return std.c.math.sinhl(x); }
|
real sinh(real x)
|
||||||
|
{
|
||||||
|
// sinh(x) = (exp(x)-exp(-x))/2;
|
||||||
|
// Very large arguments could cause an overflow, but
|
||||||
|
// the maximum value of x for which exp(x) + exp(-x)) != exp(x)
|
||||||
|
// is x = 0.5 * (real.mant_dig) * LN2. // = 22.1807 for real80.
|
||||||
|
if (fabs(x) > real.mant_dig * LN2) {
|
||||||
|
return copysign(0.5 * exp(fabs(x)), x);
|
||||||
|
}
|
||||||
|
real y = expm1(x);
|
||||||
|
return 0.5 * y / (y+1) * (y+2);
|
||||||
|
}
|
||||||
|
|
||||||
/***********************************
|
/***********************************
|
||||||
* Calculates the hyperbolic tangent of x.
|
* Calculates the hyperbolic tangent of x.
|
||||||
|
@ -556,11 +588,15 @@ real sinh(real x) { return std.c.math.sinhl(x); }
|
||||||
* $(TR $(TD $(PLUSMN)$(INFIN)) $(TD $(PLUSMN)1.0) $(TD no))
|
* $(TR $(TD $(PLUSMN)$(INFIN)) $(TD $(PLUSMN)1.0) $(TD no))
|
||||||
* )
|
* )
|
||||||
*/
|
*/
|
||||||
real tanh(real x) { return std.c.math.tanhl(x); }
|
real tanh(real x)
|
||||||
|
{
|
||||||
//real acosh(real x) { return std.c.math.acoshl(x); }
|
// tanh(x) = (exp(x) - exp(-x))/(exp(x)+exp(-x))
|
||||||
//real asinh(real x) { return std.c.math.asinhl(x); }
|
if (fabs(x) > real.mant_dig * LN2) {
|
||||||
//real atanh(real x) { return std.c.math.atanhl(x); }
|
return copysign(1, x);
|
||||||
|
}
|
||||||
|
real y = expm1(2*x);
|
||||||
|
return y / (y + 2);
|
||||||
|
}
|
||||||
|
|
||||||
/***********************************
|
/***********************************
|
||||||
* Calculates the inverse hyperbolic cosine of x.
|
* Calculates the inverse hyperbolic cosine of x.
|
||||||
|
@ -742,7 +778,96 @@ creal sqrt(creal z)
|
||||||
* $(TR $(TD -$(INFIN)) $(TD +0.0) )
|
* $(TR $(TD -$(INFIN)) $(TD +0.0) )
|
||||||
* )
|
* )
|
||||||
*/
|
*/
|
||||||
real exp(real x) { return std.c.math.expl(x); }
|
real exp(real x)
|
||||||
|
{
|
||||||
|
version(Naked_D_InlineAsm_X86) {
|
||||||
|
/* exp() for x87 80-bit reals, IEEE754-2008 conformant.
|
||||||
|
* Author: Don Clugston.
|
||||||
|
*
|
||||||
|
* exp(x) = 2^(rndint(y))* 2^(y-rndint(y)) where y = LN2*x.
|
||||||
|
* The trick for high performance is to avoid the fscale(28cycles on core2),
|
||||||
|
* frndint(19 cycles), leaving f2xm1(19 cycles) as the only slow instruction.
|
||||||
|
*
|
||||||
|
* We can do frndint by using fist. BUT we can't use it for huge numbers,
|
||||||
|
* because it will set the Invalid Operation flag is overflow or NaN occurs.
|
||||||
|
* Fortunately, whenever this happens the result would be zero or infinity.
|
||||||
|
*
|
||||||
|
* We can perform fscale by directly poking into the exponent. BUT this doesn't
|
||||||
|
* work for the (very rare) cases where the result is subnormal. So we fall back
|
||||||
|
* to the slow method in that case.
|
||||||
|
*/
|
||||||
|
enum { PARAMSIZE = (real.sizeof+3)&(0xFFFF_FFFC) } // always a multiple of 4
|
||||||
|
asm {
|
||||||
|
naked;
|
||||||
|
fld real ptr [ESP+4] ; // x
|
||||||
|
mov AX, [ESP+4+8]; // AX = exponent and sign
|
||||||
|
sub ESP, 12+8; // Create scratch space on the stack
|
||||||
|
// [ESP,ESP+2] = scratchint
|
||||||
|
// [ESP+4..+6, +8..+10, +10] = scratchreal
|
||||||
|
// set scratchreal mantissa = 1.0
|
||||||
|
mov dword ptr [ESP+8], 0;
|
||||||
|
mov dword ptr [ESP+8+4], 0x80000000;
|
||||||
|
and AX, 0x7FFF; // drop sign bit
|
||||||
|
cmp AX, 0x401D; // avoid InvalidException in fist
|
||||||
|
jae L_extreme;
|
||||||
|
fldl2e;
|
||||||
|
fmul ; // y = x*log2(e)
|
||||||
|
fist dword ptr [ESP]; // scratchint = rndint(y)
|
||||||
|
fisub dword ptr [ESP]; // y - rndint(y)
|
||||||
|
// and now set scratchreal exponent
|
||||||
|
mov EAX, [ESP];
|
||||||
|
add EAX, 0x3fff;
|
||||||
|
jle short L_subnormal;
|
||||||
|
cmp EAX,0x8000;
|
||||||
|
jge short L_overflow;
|
||||||
|
mov [ESP+8+8],AX;
|
||||||
|
L_normal:
|
||||||
|
f2xm1;
|
||||||
|
fld1;
|
||||||
|
fadd ; // 2^(y-rndint(z))
|
||||||
|
fld real ptr [ESP+8] ; // 2^rndint(y)
|
||||||
|
add ESP,12+8;
|
||||||
|
fmulp ST(1), ST;
|
||||||
|
ret PARAMSIZE;
|
||||||
|
|
||||||
|
L_subnormal:
|
||||||
|
// Result will be subnormal.
|
||||||
|
// In this rare case, the simple poking method doesn't work.
|
||||||
|
// The speed doesn't matter, so use the slow fscale method.
|
||||||
|
fild dword ptr [ESP]; // scratchint
|
||||||
|
fld1;
|
||||||
|
fscale;
|
||||||
|
fstp real ptr [ESP+8]; // scratchreal = 2^scratchint
|
||||||
|
fstp ST(0),ST; // drop scratchint
|
||||||
|
jmp L_normal;
|
||||||
|
|
||||||
|
L_extreme: // Extreme exponent. X is very large positive, very
|
||||||
|
// large negative, infinity, or NaN.
|
||||||
|
fxam;
|
||||||
|
fstsw AX;
|
||||||
|
test AX, 0x0400; // NaN_or_zero, but we already know x!=0
|
||||||
|
jz L_was_nan; // if x is NaN, returns x
|
||||||
|
// set scratchreal = real.min
|
||||||
|
// squaring it will return 0, setting underflow flag
|
||||||
|
mov word ptr [ESP+8+8], 1;
|
||||||
|
test AX, 0x0200;
|
||||||
|
jnz L_waslargenegative;
|
||||||
|
L_overflow:
|
||||||
|
// Set scratchreal = real.max.
|
||||||
|
// squaring it will create infinity, and set overflow flag.
|
||||||
|
mov word ptr [ESP+8+8], 0x7FFE;
|
||||||
|
L_waslargenegative:
|
||||||
|
fstp ST(0), ST;
|
||||||
|
fld real ptr [ESP+8]; // load scratchreal
|
||||||
|
fmul ST(0), ST; // square it, to create havoc!
|
||||||
|
L_was_nan:
|
||||||
|
add ESP,12+8;
|
||||||
|
ret PARAMSIZE;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return std.c.math.expl(x);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**********************
|
/**********************
|
||||||
* Calculates 2$(SUP x).
|
* Calculates 2$(SUP x).
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue