mirror of
https://github.com/dlang/phobos.git
synced 2025-05-10 14:08:32 +03:00
* exp() is now 3 times faster.
* sinh, cosh, tanh now use D implementations rather than the C standard library. * tan() now uses the C library if asm is unavailable.
This commit is contained in:
parent
4fd01b9517
commit
ae4fd3b9f7
1 changed files with 134 additions and 9 deletions
143
std/math.d
143
std/math.d
|
@ -69,6 +69,18 @@ private import std.string;
|
|||
private import std.c.math;
|
||||
private import std.traits;
|
||||
|
||||
version(GNU){
|
||||
// GDC can't actually do inline asm.
|
||||
} else version(D_InlineAsm_X86) {
|
||||
version = Naked_D_InlineAsm_X86;
|
||||
} else version(LDC) {
|
||||
import ldc.intrinsics;
|
||||
version(X86)
|
||||
{
|
||||
version = LDC_X86;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
/*
|
||||
|
@ -377,6 +389,7 @@ unittest{
|
|||
|
||||
real tan(real x)
|
||||
{
|
||||
version(Naked_D_InlineAsm_X86) {
|
||||
asm
|
||||
{
|
||||
fld x[EBP] ; // load theta
|
||||
|
@ -408,7 +421,10 @@ trigerr:
|
|||
return real.nan;
|
||||
|
||||
Lret:
|
||||
;
|
||||
;
|
||||
} else {
|
||||
return stdc.math.tanl(x);
|
||||
}
|
||||
}
|
||||
|
||||
unittest
|
||||
|
@ -534,7 +550,12 @@ real atan2(real y, real x) { return std.c.math.atan2l(y,x); }
|
|||
* $(TR $(TD $(PLUSMN)$(INFIN)) $(TD $(PLUSMN)0.0) $(TD no) )
|
||||
* )
|
||||
*/
|
||||
real cosh(real x) { return std.c.math.coshl(x); }
|
||||
real cosh(real x) {
|
||||
// cosh = (exp(x)+exp(-x))/2.
|
||||
// The naive implementation works correctly.
|
||||
real y = exp(x);
|
||||
return (y + 1.0/y) * 0.5;
|
||||
}
|
||||
|
||||
/***********************************
|
||||
* Calculates the hyperbolic sine of x.
|
||||
|
@ -545,7 +566,18 @@ real cosh(real x) { return std.c.math.coshl(x); }
|
|||
* $(TR $(TD $(PLUSMN)$(INFIN)) $(TD $(PLUSMN)$(INFIN)) $(TD no))
|
||||
* )
|
||||
*/
|
||||
real sinh(real x) { return std.c.math.sinhl(x); }
|
||||
real sinh(real x)
|
||||
{
|
||||
// sinh(x) = (exp(x)-exp(-x))/2;
|
||||
// Very large arguments could cause an overflow, but
|
||||
// the maximum value of x for which exp(x) + exp(-x)) != exp(x)
|
||||
// is x = 0.5 * (real.mant_dig) * LN2. // = 22.1807 for real80.
|
||||
if (fabs(x) > real.mant_dig * LN2) {
|
||||
return copysign(0.5 * exp(fabs(x)), x);
|
||||
}
|
||||
real y = expm1(x);
|
||||
return 0.5 * y / (y+1) * (y+2);
|
||||
}
|
||||
|
||||
/***********************************
|
||||
* Calculates the hyperbolic tangent of x.
|
||||
|
@ -556,11 +588,15 @@ real sinh(real x) { return std.c.math.sinhl(x); }
|
|||
* $(TR $(TD $(PLUSMN)$(INFIN)) $(TD $(PLUSMN)1.0) $(TD no))
|
||||
* )
|
||||
*/
|
||||
real tanh(real x) { return std.c.math.tanhl(x); }
|
||||
|
||||
//real acosh(real x) { return std.c.math.acoshl(x); }
|
||||
//real asinh(real x) { return std.c.math.asinhl(x); }
|
||||
//real atanh(real x) { return std.c.math.atanhl(x); }
|
||||
real tanh(real x)
|
||||
{
|
||||
// tanh(x) = (exp(x) - exp(-x))/(exp(x)+exp(-x))
|
||||
if (fabs(x) > real.mant_dig * LN2) {
|
||||
return copysign(1, x);
|
||||
}
|
||||
real y = expm1(2*x);
|
||||
return y / (y + 2);
|
||||
}
|
||||
|
||||
/***********************************
|
||||
* Calculates the inverse hyperbolic cosine of x.
|
||||
|
@ -742,7 +778,96 @@ creal sqrt(creal z)
|
|||
* $(TR $(TD -$(INFIN)) $(TD +0.0) )
|
||||
* )
|
||||
*/
|
||||
real exp(real x) { return std.c.math.expl(x); }
|
||||
real exp(real x)
|
||||
{
|
||||
version(Naked_D_InlineAsm_X86) {
|
||||
/* exp() for x87 80-bit reals, IEEE754-2008 conformant.
|
||||
* Author: Don Clugston.
|
||||
*
|
||||
* exp(x) = 2^(rndint(y))* 2^(y-rndint(y)) where y = LN2*x.
|
||||
* The trick for high performance is to avoid the fscale(28cycles on core2),
|
||||
* frndint(19 cycles), leaving f2xm1(19 cycles) as the only slow instruction.
|
||||
*
|
||||
* We can do frndint by using fist. BUT we can't use it for huge numbers,
|
||||
* because it will set the Invalid Operation flag is overflow or NaN occurs.
|
||||
* Fortunately, whenever this happens the result would be zero or infinity.
|
||||
*
|
||||
* We can perform fscale by directly poking into the exponent. BUT this doesn't
|
||||
* work for the (very rare) cases where the result is subnormal. So we fall back
|
||||
* to the slow method in that case.
|
||||
*/
|
||||
enum { PARAMSIZE = (real.sizeof+3)&(0xFFFF_FFFC) } // always a multiple of 4
|
||||
asm {
|
||||
naked;
|
||||
fld real ptr [ESP+4] ; // x
|
||||
mov AX, [ESP+4+8]; // AX = exponent and sign
|
||||
sub ESP, 12+8; // Create scratch space on the stack
|
||||
// [ESP,ESP+2] = scratchint
|
||||
// [ESP+4..+6, +8..+10, +10] = scratchreal
|
||||
// set scratchreal mantissa = 1.0
|
||||
mov dword ptr [ESP+8], 0;
|
||||
mov dword ptr [ESP+8+4], 0x80000000;
|
||||
and AX, 0x7FFF; // drop sign bit
|
||||
cmp AX, 0x401D; // avoid InvalidException in fist
|
||||
jae L_extreme;
|
||||
fldl2e;
|
||||
fmul ; // y = x*log2(e)
|
||||
fist dword ptr [ESP]; // scratchint = rndint(y)
|
||||
fisub dword ptr [ESP]; // y - rndint(y)
|
||||
// and now set scratchreal exponent
|
||||
mov EAX, [ESP];
|
||||
add EAX, 0x3fff;
|
||||
jle short L_subnormal;
|
||||
cmp EAX,0x8000;
|
||||
jge short L_overflow;
|
||||
mov [ESP+8+8],AX;
|
||||
L_normal:
|
||||
f2xm1;
|
||||
fld1;
|
||||
fadd ; // 2^(y-rndint(z))
|
||||
fld real ptr [ESP+8] ; // 2^rndint(y)
|
||||
add ESP,12+8;
|
||||
fmulp ST(1), ST;
|
||||
ret PARAMSIZE;
|
||||
|
||||
L_subnormal:
|
||||
// Result will be subnormal.
|
||||
// In this rare case, the simple poking method doesn't work.
|
||||
// The speed doesn't matter, so use the slow fscale method.
|
||||
fild dword ptr [ESP]; // scratchint
|
||||
fld1;
|
||||
fscale;
|
||||
fstp real ptr [ESP+8]; // scratchreal = 2^scratchint
|
||||
fstp ST(0),ST; // drop scratchint
|
||||
jmp L_normal;
|
||||
|
||||
L_extreme: // Extreme exponent. X is very large positive, very
|
||||
// large negative, infinity, or NaN.
|
||||
fxam;
|
||||
fstsw AX;
|
||||
test AX, 0x0400; // NaN_or_zero, but we already know x!=0
|
||||
jz L_was_nan; // if x is NaN, returns x
|
||||
// set scratchreal = real.min
|
||||
// squaring it will return 0, setting underflow flag
|
||||
mov word ptr [ESP+8+8], 1;
|
||||
test AX, 0x0200;
|
||||
jnz L_waslargenegative;
|
||||
L_overflow:
|
||||
// Set scratchreal = real.max.
|
||||
// squaring it will create infinity, and set overflow flag.
|
||||
mov word ptr [ESP+8+8], 0x7FFE;
|
||||
L_waslargenegative:
|
||||
fstp ST(0), ST;
|
||||
fld real ptr [ESP+8]; // load scratchreal
|
||||
fmul ST(0), ST; // square it, to create havoc!
|
||||
L_was_nan:
|
||||
add ESP,12+8;
|
||||
ret PARAMSIZE;
|
||||
}
|
||||
} else {
|
||||
return std.c.math.expl(x);
|
||||
}
|
||||
}
|
||||
|
||||
/**********************
|
||||
* Calculates 2$(SUP x).
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue