// Written in the D programming language. /** This module is a port of a growing fragment of the $(D_PARAM numeric) header in Alexander Stepanov's $(LINK2 http://sgi.com/tech/stl, Standard Template Library), with a few additions. Macros: WIKI = Phobos/StdNumeric Author: $(WEB erdani.org, Andrei Alexandrescu), Don Clugston */ /* * Copyright (C) 2004-2006 by Digital Mars, www.digitalmars.com * Written by Andrei Alexandrescu, www.erdani.org * * This software is provided 'as-is', without any express or implied * warranty. In no event will the authors be held liable for any damages * arising from the use of this software. * * Permission is granted to anyone to use this software for any purpose, * including commercial applications, and to alter it and redistribute it * freely, subject to the following restrictions: * * o The origin of this software must not be misrepresented; you must not * claim that you wrote the original software. If you use this software * in a product, an acknowledgment in the product documentation would be * appreciated but is not required. * o Altered source versions must be plainly marked as such, and must not * be misrepresented as being the original software. * o This notice may not be removed or altered from any source * distribution. */ module std.numeric; import std.typecons; import std.math; import std.traits; import std.contracts; import std.random; import std.string; version(unittest) { import std.stdio; } /** Implements the $(LINK2 http://tinyurl.com/2zb9yr,secant method) for finding a root of the function $(D_PARAM f) starting from points [xn_1, x_n] (ideally close to the root). $(D_PARAM Num) may be $(D_PARAM float), $(D_PARAM double), or $(D_PARAM real). Example: ---- float f(float x) { return cos(x) - x*x*x; } auto x = secantMethod(&f, 0f, 1f); assert(approxEqual(x, 0.865474)); ---- */ template secantMethod(alias F) { Num secantMethod(Num)(Num xn_1, Num xn) { auto fxn = F(xn_1), d = xn_1 - xn; typeof(fxn) fxn_1; xn = xn_1; while (!approxEqual(d, 0) && isfinite(d)) { xn_1 = xn; xn -= d; fxn_1 = fxn; fxn = F(xn); d *= -fxn / (fxn - fxn_1); } return xn; } } unittest { scope(failure) writeln(stderr, "Failure testing secantMethod"); float f(float x) { return cos(x) - x*x*x; } invariant x = secantMethod!(f)(0f, 1f); assert(approxEqual(x, 0.865474)); } private: // Return true if a and b have opposite sign. bool oppositeSigns(T)(T a, T b) { // Use signbit() if available, otherwise check the signs. static if (is(typeof(signbit(a)))) { return (signbit(a) ^ signbit(b))!=0; } else return (a>0 && b<0) || (a>0 && b<0); } public: /** Find a real root of a real function f(x) via bracketing. * * Given a function $(D f) and a range $(D [a..b]) such that $(D f(a)) * and $(D f(b)) have opposite signs, returns the value of $(D x) in * the range which is closest to a root of $(D f(x)). If $(D f(x)) * has more than one root in the range, one will be chosen * arbitrarily. If $(D f(x)) returns NaN, NaN will be returned; * otherwise, this algorithm is guaranteed to succeed. * * Uses an algorithm based on TOMS748, which uses inverse cubic * interpolation whenever possible, otherwise reverting to parabolic * or secant interpolation. Compared to TOMS748, this implementation * improves worst-case performance by a factor of more than 100, and * typical performance by a factor of 2. For 80-bit reals, most * problems require 8 to 15 calls to $(D f(x)) to achieve full machine * precision. The worst-case performance (pathological cases) is * approximately twice the number of bits. * * References: "On Enclosing Simple Roots of Nonlinear Equations", * G. Alefeld, F.A. Potra, Yixun Shi, Mathematics of Computation 61, * pp733-744 (1993). Fortran code available from $(WEB * www.netlib.org,www.netlib.org) as algorithm TOMS478. * */ T findRoot(T, R)(R delegate(T) f, T a, T b) { auto r = findRoot(f, a, b, f(a), f(b), (T lo, T hi){ return false; }); // Return the first value if it is smaller or NaN return fabs(r._2) !> fabs(r._3) ? r._0 : r._1; } /** Find root of a real function f(x) by bracketing, allowing the * termination condition to be specified. * * Params: * * f = Function to be analyzed * * ax = Left bound of initial range of $(D f) known to contain the * root. * * bx = Right bound of initial range of $(D f) known to contain the * root. * * fax = Value of $(D f(ax)). * * fax = Value of $(D f(ax)) and $(D f(bx)). ($(D f(ax)) and $(D * f(bx)) are commonly known in advance.) * * * tolerance = Defines an early termination condition. Receives the * current upper and lower bounds on the root. The * delegate must return $(D true) when these bounds are * acceptable. If this function always returns $(D false), * full machine precision will be achieved. * * Returns: * * A tuple consisting of two ranges. The first two elements are the * range (in $(D x)) of the root, while the second pair of elements * are the corresponding function values at those points. If an exact * root was found, both of the first two elements will contain the * root, and the second pair of elements will be 0. */ Tuple!(T, T, R, R) findRoot(T,R)(R delegate(T) f, T ax, T bx, R fax, R fbx, bool delegate(T lo, T hi) tolerance) in { assert(ax<>=0 && bx<>=0, "Limits must not be NaN"); assert(oppositeSigns(fax, fbx), "Parameters must bracket the root."); } body { // This code is (heavily) modified from TOMS748 (www.netlib.org). Some ideas // were borrowed from the Boost Mathematics Library. T a, b, d; // [a..b] is our current bracket. d is the third best guess. R fa, fb, fd; // Values of f at a, b, d. bool done = false; // Has a root been found? // Allow ax and bx to be provided in reverse order if (ax <= bx) { a = ax; fa = fax; b = bx; fb = fbx; } else { a = bx; fa = fbx; b = ax; fb = fax; } // Test the function at point c; update brackets accordingly void bracket(T c) { T fc = f(c); if (fc !<> 0) { // Exact solution, or NaN a = c; fa = fc; d = c; fd = fc; done = true; return; } // Determine new enclosing interval if (oppositeSigns(fa, fc)) { d = b; fd = fb; b = c; fb = fc; } else { d = a; fd = fa; a = c; fa = fc; } } /* Perform a secant interpolation. If the result would lie on a or b, or if a and b differ so wildly in magnitude that the result would be meaningless, perform a bisection instead. */ T secant_interpolate(T a, T b, T fa, T fb) { if (( ((a - b) == a) && b!=0) || (a!=0 && ((b - a) == b))) { // Catastrophic cancellation if (a == 0) a = copysign(0.0L, b); else if (b == 0) b = copysign(0.0L, a); else if (oppositeSigns(a, b)) return 0; T c = ieeeMean(a, b); return c; } // avoid overflow if (b - a > T.max) return b / 2.0 + a / 2.0; if (fb - fa > T.max) return a - (b - a) / 2; T c = a - (fa / (fb - fa)) * (b - a); if (c == a || c == b) return (a + b) / 2; return c; } /* Uses 'numsteps' newton steps to approximate the zero in [a..b] of the quadratic polynomial interpolating f(x) at a, b, and d. Returns: The approximate zero in [a..b] of the quadratic polynomial. */ T newtonQuadratic(int numsteps) { // Find the coefficients of the quadratic polynomial. T a0 = fa; T a1 = (fb - fa)/(b - a); T a2 = ((fd - fb)/(d - b) - a1)/(d - a); // Determine the starting point of newton steps. T c = oppositeSigns(a2, fa) ? a : b; // start the safeguarded newton steps. for (int i = 0; i 0) { done = true; b = a; fb = fa; } else if (fb !<> 0) { done = true; a = b; fa = fb; } else { bracket(secant_interpolate(a, b, fa, fb)); } // Starting with the second iteration, higher-order interpolation can // be used. int itnum = 1; // Iteration number int baditer = 1; // Num bisections to take if an iteration is bad. T c, e; // e is our fourth best guess R fe; whileloop: while(!done && (b != nextUp(a)) && !tolerance(a, b)) { T a0 = a, b0 = b; // record the brackets // Do two higher-order (cubic or parabolic) interpolation steps. for (int QQ = 0; QQ < 2; ++QQ) { // Cubic inverse interpolation requires that // all four function values fa, fb, fd, and fe are distinct; // otherwise use quadratic interpolation. bool distinct = (fa != fb) && (fa != fd) && (fa != fe) && (fb != fd) && (fb != fe) && (fd != fe); // The first time, cubic interpolation is impossible. if (itnum<2) distinct = false; bool ok = distinct; if (distinct) { // Cubic inverse interpolation of f(x) at a, b, d, and e real q11 = (d - e) * fd / (fe - fd); real q21 = (b - d) * fb / (fd - fb); real q31 = (a - b) * fa / (fb - fa); real d21 = (b - d) * fd / (fd - fb); real d31 = (a - b) * fb / (fb - fa); real q22 = (d21 - q11) * fb / (fe - fb); real q32 = (d31 - q21) * fa / (fd - fa); real d32 = (d31 - q21) * fd / (fd - fa); real q33 = (d32 - q22) * fa / (fe - fa); c = a + (q31 + q32 + q33); if (c!<>=0 || (c <= a) || (c >= b)) { // DAC: If the interpolation predicts a or b, it's // probable that it's the actual root. Only allow this if // we're already close to the root. if (c == a && a - b != a) { c = nextUp(a); } else if (c == b && a - b != -b) { c = nextDown(b); } else { ok = false; } } } if (!ok) { // DAC: Alefeld doesn't explain why the number of newton steps // should vary. c = newtonQuadratic(distinct ? 3 : 2); if(c!<>=0 || (c <= a) || (c >= b)) { // Failure, try a secant step: c = secant_interpolate(a, b, fa, fb); } } ++itnum; e = d; fe = fd; bracket(c); if( done || ( b == nextUp(a)) || tolerance(a, b)) break whileloop; if (itnum == 2) continue whileloop; } // Now we take a double-length secant step: T u; R fu; if(fabs(fa) < fabs(fb)) { u = a; fu = fa; } else { u = b; fu = fb; } c = u - 2 * (fu / (fb - fa)) * (b - a); // DAC: If the secant predicts a value equal to an endpoint, it's // probably false. if(c==a || c==b || c!<>=0 || fabs(c - u) > (b - a) / 2) { if ((a-b) == a || (b-a) == b) { if ( (a>0 && b<0) || (a<0 && b>0) ) c = 0; else { if (a==0) c = ieeeMean(copysign(0.0L, b), b); else if (b==0) c = ieeeMean(copysign(0.0L, a), a); else c = ieeeMean(a, b); } } else { c = a + (b - a) / 2; } } e = d; fe = fd; bracket(c); if(done || (b == nextUp(a)) || tolerance(a, b)) break; // IMPROVE THE WORST-CASE PERFORMANCE // We must ensure that the bounds reduce by a factor of 2 // in binary space! every iteration. If we haven't achieved this // yet, or if we don't yet know what the exponent is, // perform a binary chop. if( (a==0 || b==0 || (fabs(a) >= 0.5 * fabs(b) && fabs(b) >= 0.5 * fabs(a))) && (b - a) < 0.25 * (b0 - a0)) { baditer = 1; continue; } // DAC: If this happens on consecutive iterations, we probably have a // pathological function. Perform a number of bisections equal to the // total number of consecutive bad iterations. if ((b - a) < 0.25 * (b0 - a0)) baditer = 1; for (int QQ = 0; QQ < baditer ;++QQ) { e = d; fe = fd; T w; if ((a>0 && b<0) ||(a<0 && b>0)) w = 0; else { T usea = a; T useb = b; if (a == 0) usea = copysign(0.0L, b); else if (b == 0) useb = copysign(0.0L, a); w = ieeeMean(usea, useb); } bracket(w); } ++baditer; } return Tuple!(T, T, R, R)(a, b, fa, fb); } unittest{ int numProblems = 0; int numCalls; void testFindRoot(real delegate(real) f, real x1, real x2) { numCalls=0; ++numProblems; assert(x1<>=0 && x2<>=0); auto result = findRoot(f, x1, x2, f(x1), f(x2), (real lo, real hi) { return false; }); auto flo = f(result._0); auto fhi = f(result._1); if (flo!=0) { assert(oppositeSigns(flo, fhi)); } } // Test functions real cubicfn (real x) { ++numCalls; if (x>float.max) x = float.max; if (x<-double.max) x = -double.max; // This has a single real root at -59.286543284815 return 0.386*x*x*x + 23*x*x + 15.7*x + 525.2; } // Test a function with more than one root. real multisine(real x) { ++numCalls; return sin(x); } testFindRoot( &multisine, 6, 90); testFindRoot(&cubicfn, -100, 100); testFindRoot( &cubicfn, -double.max, real.max); /* Tests from the paper: * "On Enclosing Simple Roots of Nonlinear Equations", G. Alefeld, F.A. Potra, * Yixun Shi, Mathematics of Computation 61, pp733-744 (1993). */ // Parameters common to many alefeld tests. int n; real ale_a, ale_b; int powercalls = 0; real power(real x) { ++powercalls; ++numCalls; return pow(x, n) + double.min; } int [] power_nvals = [3, 5, 7, 9, 19, 25]; // Alefeld paper states that pow(x,n) is a very poor case, where bisection // outperforms his method, and gives total numcalls = // 921 for bisection (2.4 calls per bit), 1830 for Alefeld (4.76/bit), // 2624 for brent (6.8/bit) // ... but that is for double, not real80. // This poor performance seems mainly due to catastrophic cancellation, // which is avoided here by the use of ieeeMean(). // I get: 231 (0.48/bit). // IE this is 10X faster in Alefeld's worst case numProblems=0; foreach(k; power_nvals) { n = k; testFindRoot(&power, -1, 10); } int powerProblems = numProblems; // Tests from Alefeld paper int [9] alefeldSums; real alefeld0(real x){ ++alefeldSums[0]; ++numCalls; real q = sin(x) - x/2; for (int i=1; i<20; ++i) q+=(2*i-5.0)*(2*i-5.0)/((x-i*i)*(x-i*i)*(x-i*i)); return q; } real alefeld1(real x) { ++numCalls; ++alefeldSums[1]; return ale_a*x + exp(ale_b * x); } real alefeld2(real x) { ++numCalls; ++alefeldSums[2]; return pow(x, n) - ale_a; } real alefeld3(real x) { ++numCalls; ++alefeldSums[3]; return (1.0 +pow(1.0L-n, 2))*x - pow(1.0L-n*x, 2); } real alefeld4(real x) { ++numCalls; ++alefeldSums[4]; return x*x - pow(1-x, n); } real alefeld5(real x) { ++numCalls; ++alefeldSums[5]; return (1+pow(1.0L-n, 4))*x - pow(1.0L-n*x, 4); } real alefeld6(real x) { ++numCalls; ++alefeldSums[6]; return exp(-n*x)*(x-1.01L) + pow(x, n); } real alefeld7(real x) { ++numCalls; ++alefeldSums[7]; return (n*x-1)/((n-1)*x); } numProblems=0; testFindRoot(&alefeld0, PI_2, PI); for (n=1; n<=10; ++n) { testFindRoot(&alefeld0, n*n+1e-9L, (n+1)*(n+1)-1e-9L); } ale_a = -40; ale_b = -1; testFindRoot(&alefeld1, -9, 31); ale_a = -100; ale_b = -2; testFindRoot(&alefeld1, -9, 31); ale_a = -200; ale_b = -3; testFindRoot(&alefeld1, -9, 31); int [] nvals_3 = [1, 2, 5, 10, 15, 20]; int [] nvals_5 = [1, 2, 4, 5, 8, 15, 20]; int [] nvals_6 = [1, 5, 10, 15, 20]; int [] nvals_7 = [2, 5, 15, 20]; for(int i=4; i<12; i+=2) { n = i; ale_a = 0.2; testFindRoot(&alefeld2, 0, 5); ale_a=1; testFindRoot(&alefeld2, 0.95, 4.05); testFindRoot(&alefeld2, 0, 1.5); } foreach(i; nvals_3) { n=i; testFindRoot(&alefeld3, 0, 1); } foreach(i; nvals_3) { n=i; testFindRoot(&alefeld4, 0, 1); } foreach(i; nvals_5) { n=i; testFindRoot(&alefeld5, 0, 1); } foreach(i; nvals_6) { n=i; testFindRoot(&alefeld6, 0, 1); } foreach(i; nvals_7) { n=i; testFindRoot(&alefeld7, 0.01L, 1); } real worstcase(real x) { ++numCalls; return x<0.3*real.max? -0.999e-3 : 1.0; } testFindRoot(&worstcase, -real.max, real.max); /* int grandtotal=0; foreach(calls; alefeldSums) { grandtotal+=calls; } grandtotal-=2*numProblems; printf("\nALEFELD TOTAL = %d avg = %f (alefeld avg=19.3 for double)\n", grandtotal, (1.0*grandtotal)/numProblems); powercalls -= 2*powerProblems; printf("POWER TOTAL = %d avg = %f ", powercalls, (1.0*powercalls)/powerProblems); */ } template tabulateFixed(alias fun, uint n, real maxError, real left, real right) { ReturnType!(fun) tabulateFixed(ParameterTypeTuple!(fun) arg) { alias ParameterTypeTuple!(fun)[0] num; static num[n] table; alias arg[0] x; enforce(left <= x && x < right); invariant i = cast(uint) (table.length * ((x - left) / (right - left))); assert(i < n); if (isnan(table[i])) { // initialize it auto x1 = left + i * (right - left) / n; auto x2 = left + (i + 1) * (right - left) / n; invariant y1 = fun(x1), y2 = fun(x2); invariant y = 2 * y1 * y2 / (y1 + y2); num wyda(num xx) { return fun(xx) - y; } auto bestX = findRoot(&wyda, x1, x2); table[i] = fun(bestX); invariant leftError = abs((table[i] - y1) / y1); enforce(leftError <= maxError, text(leftError, " > ", maxError)); invariant rightError = abs((table[i] - y2) / y2); enforce(rightError <= maxError, text(rightError, " > ", maxError)); } return table[i]; } } unittest { version (none) // fails unit tests { enum epsilon = 0.01; alias tabulateFixed!(tanh, 700, epsilon, 0.2, 3) fasttanh; uint testSize = 100000; auto rnd = Random(unpredictableSeed); foreach (i; 0 .. testSize) { invariant x = uniform(rnd, 0.2F, 3.0F); invariant float y = fasttanh(x), w = tanh(x); invariant e = abs(y - w) / w; //writefln("%.20f", e); enforce(e <= epsilon, text("x = ", x, ", fasttanh(x) = ", y, ", tanh(x) = ", w, ", relerr = ", e)); } } }