mirror of
https://github.com/dlang/phobos.git
synced 2025-04-28 22:21:09 +03:00
1783 lines
56 KiB
D
1783 lines
56 KiB
D
// Written in the D programming language.
|
|
|
|
/**
|
|
This module is a port of a growing fragment of the $(D_PARAM numeric)
|
|
header in Alexander Stepanov's $(LINK2 http://sgi.com/tech/stl,
|
|
Standard Template Library), with a few additions.
|
|
|
|
Macros:
|
|
|
|
WIKI = Phobos/StdNumeric
|
|
|
|
Authors:
|
|
|
|
$(WEB erdani.org, Andrei Alexandrescu), Don Clugston
|
|
*/
|
|
|
|
module std.numeric;
|
|
import std.algorithm;
|
|
import std.array;
|
|
import std.bitmanip;
|
|
import std.typecons;
|
|
import std.math;
|
|
import std.traits;
|
|
import std.contracts;
|
|
import std.random;
|
|
import std.string;
|
|
import std.range;
|
|
import std.c.stdlib;
|
|
import std.functional;
|
|
import std.typetuple;
|
|
version(unittest)
|
|
{
|
|
import std.stdio;
|
|
import std.conv;
|
|
}
|
|
|
|
/**
|
|
Flags for custom floating-point formats. In the case of $(LUCKY
|
|
IEEE754) types, all of these flags are applied.
|
|
*/
|
|
private enum CustomFloatFlags
|
|
{
|
|
/**
|
|
Store values in normalized form by default, i.e. the fraction is
|
|
assumed to be $(D 1.nnnn), not $(D 0.nnnn). Normalization is the
|
|
default in all $(LUCKY IEE754) types. If a floating-point type is
|
|
defined to store only numbers inside $(D (-1, 1)), normalization may
|
|
not be useful.
|
|
*/
|
|
storeNormalized = 1,
|
|
/**
|
|
Allow denormalized values (a la $(LUCKY IEEE754 denormalized)).
|
|
If this flag is on, a value with an exponent
|
|
containing all bits zero is considered to contain a denormalized
|
|
value.
|
|
*/
|
|
allowDenorm = 2,
|
|
/**
|
|
Support for infinity values (a la $(LUCKY IEEE754 _infinity)). If this
|
|
flag is on, a value with an exponent containing all bits one and
|
|
fraction zero is assumed to contain a (valid) allowDenormalized value. The sign,
|
|
if applicable, is decided by the sign bit.
|
|
*/
|
|
infinity = 4,
|
|
/**
|
|
Support for NaN (Not a Number) values (a la $(LUCKY IEEE754 Not a Number)).
|
|
If this flag is on, a value with an exponent containing all
|
|
bits one and a nonzero fraction is considered to contain a
|
|
allowDenormalized value.
|
|
*/
|
|
nan = 8,
|
|
/**
|
|
Include _all of the above options.
|
|
*/
|
|
all = storeNormalized | allowDenorm | infinity | nan
|
|
}
|
|
|
|
/**
|
|
Allows user code to define custom floating-point formats. These
|
|
formats are for storage only; to do operations on custom
|
|
floating-point types, one must extract the value into a $(D float) or
|
|
$(D double). After the operation is completed the result can be
|
|
deposited in a custom floating-point value by using its assignment
|
|
operator.
|
|
|
|
Example:
|
|
----
|
|
// Define a 16-bit floating point type
|
|
alias CustomFloat!(1, 5, 10) HalfFloat;
|
|
auto x = HalfFloat(0.125);
|
|
assert(halfFloat.get!float == 0.125);
|
|
----
|
|
*/
|
|
struct CustomFloat(
|
|
bool signBit, // allocate a sign bit? (true for float)
|
|
uint fractionBits, // fraction bits (23 for float)
|
|
uint exponentBits, // exponent bits (8 for float)
|
|
uint bias = (1u << (exponentBits - 1)) - 1, // bias (127 for float)
|
|
CustomFloatFlags flags = CustomFloatFlags.all)
|
|
{
|
|
alias CustomFloatFlags Flags;
|
|
private enum totalSize = signBit + fractionBits + exponentBits;
|
|
static assert(totalSize % 8 == 0);
|
|
private mixin(bitfields!(
|
|
bool, "sign", signBit,
|
|
uint, "fraction", fractionBits,
|
|
uint, "exponent", exponentBits));
|
|
|
|
/**
|
|
Initialize from a $(D float).
|
|
*/
|
|
this(float input) { this = input; }
|
|
/**
|
|
Initialize from a $(D double).
|
|
*/
|
|
this(double input) { this = input; }
|
|
/**
|
|
Assigns from either a $(D float) or a $(D double).
|
|
*/
|
|
void opAssign(F)(F input) if (indexOf!(Unqual!F, float, double) >= 0)
|
|
{
|
|
static if (is(Unqual!F == float))
|
|
auto value = FloatRep(input);
|
|
else
|
|
auto value = DoubleRep(input);
|
|
// Assign the sign bit
|
|
static if (!signBit) enforce(!value.sign);
|
|
else sign = value.sign;
|
|
|
|
// Assign the exponent and fraction
|
|
auto e = value.exponent;
|
|
if (e == 0)
|
|
{
|
|
// denormalized source value
|
|
static if (flags & Flags.allowDenorm)
|
|
{
|
|
exponent = 0;
|
|
fraction = cast(typeof(fraction_max)) value.fraction;
|
|
}
|
|
else
|
|
{
|
|
assert(0);
|
|
}
|
|
}
|
|
else if (e == value.exponent_max)
|
|
{
|
|
// infinity or NaN
|
|
if (value.fraction == 0)
|
|
// Infinity
|
|
static if (flags & Flags.infinity)
|
|
{
|
|
fraction = 0;
|
|
exponent = exponent_max;
|
|
}
|
|
else
|
|
{
|
|
assert(0);
|
|
}
|
|
else
|
|
{
|
|
// NaN
|
|
static if (flags & Flags.nan)
|
|
{
|
|
fraction = cast(typeof(fraction())) value.fraction;
|
|
exponent = exponent_max;
|
|
}
|
|
else
|
|
{
|
|
assert(0);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// normal value
|
|
exponent = cast(typeof(exponent_max))
|
|
(value.exponent + (bias - value.bias));
|
|
static if (fractionBits >= value.fractionBits)
|
|
{
|
|
fraction = cast(typeof(fraction_max))
|
|
(value.fraction << (fractionBits - value.fractionBits));
|
|
}
|
|
else
|
|
{
|
|
fraction = cast(typeof(fraction_max))
|
|
(value.fraction >> (value.fractionBits - fractionBits));
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
Fetches the stored value either as a $(D float) or $(D double).
|
|
*/
|
|
F get(F)()
|
|
{
|
|
static if (is(Unqual!F == float))
|
|
FloatRep result = void;
|
|
else
|
|
DoubleRep result = void;
|
|
static if (signBit) result.sign = sign;
|
|
else result.sign = 0;
|
|
auto e = exponent;
|
|
if (e == 0)
|
|
{
|
|
// denormalized value
|
|
result.exponent = 0;
|
|
result.fraction = fraction;
|
|
}
|
|
else if (e == exponent_max)
|
|
{
|
|
// infinity or NaN
|
|
}
|
|
else
|
|
{
|
|
// normal value
|
|
result.exponent = cast(typeof(result.exponent_max))
|
|
(exponent + (result.bias - bias));
|
|
static if (fractionBits <= result.fractionBits)
|
|
{
|
|
alias Select!(result.fractionBits >= 32, ulong, uint) S;
|
|
result.fraction = cast(typeof(result.fraction_max))
|
|
(cast(S) fraction << (result.fractionBits - fractionBits));
|
|
}
|
|
else
|
|
{
|
|
result.fraction = cast(typeof(result.fraction()))
|
|
(fraction >> (fractionBits - result.fractionBits));
|
|
}
|
|
}
|
|
return result.value;
|
|
}
|
|
}
|
|
|
|
unittest
|
|
{
|
|
alias TypeTuple!(
|
|
CustomFloat!(1, 5, 10),
|
|
CustomFloat!(0, 5, 11),
|
|
CustomFloat!(0, 1, 15)
|
|
) FPTypes;
|
|
foreach (F; FPTypes)
|
|
{
|
|
auto x = F(0.125);
|
|
assert(x.get!float == 0.125F);
|
|
assert(x.get!double == 0.125);
|
|
}
|
|
}
|
|
|
|
/**
|
|
Defines the fastest type to use when storing temporaries of a
|
|
calculation intended to ultimately yield a result of type $(D F)
|
|
(where $(D F) must be one of $(D float), $(D double), or $(D
|
|
real)). When doing a multi-step computation, you may want to store
|
|
intermediate results as $(D FPTemporary!F).
|
|
|
|
Example:
|
|
----
|
|
// Average numbers in an array
|
|
double avg(in double[] a)
|
|
{
|
|
if (a.length == 0) return 0;
|
|
FPTemporary!double result = 0;
|
|
foreach (e; a) result += e;
|
|
return result / a.length;
|
|
}
|
|
----
|
|
|
|
The necessity of $(D FPTemporary) stems from the optimized
|
|
floating-point operations and registers present in virtually all
|
|
processors. When adding numbers in the example above, the addition may
|
|
in fact be done in $(D real) precision internally. In that case,
|
|
storing the intermediate $(D result) in $(D double format) is not only
|
|
less precise, it is also (surprisingly) slower, because a conversion
|
|
from $(D real) to $(D double) is performed every pass through the
|
|
loop. This being a lose-lose situation, $(D FPTemporary!F) has been
|
|
defined as the $(I fastest) type to use for calculations at precision
|
|
$(D F). There is no need to define a type for the $(I most accurate)
|
|
calculations, as that is always $(D real).
|
|
|
|
Finally, there is no guarantee that using $(D FPTemporary!F) will
|
|
always be fastest, as the speed of floating-point calculations depends
|
|
on very many factors.
|
|
*/
|
|
template FPTemporary(F) if (isFloatingPoint!F)
|
|
{
|
|
alias real FPTemporary;
|
|
}
|
|
|
|
/**
|
|
Implements the $(WEB tinyurl.com/2zb9yr, secant method) for finding a
|
|
root of the function $(D fun) starting from points $(D [xn_1, x_n])
|
|
(ideally close to the root). $(D Num) may be $(D float), $(D double),
|
|
or $(D real).
|
|
|
|
Example:
|
|
|
|
----
|
|
float f(float x) {
|
|
return cos(x) - x*x*x;
|
|
}
|
|
auto x = secantMethod!(f)(0f, 1f);
|
|
assert(approxEqual(x, 0.865474));
|
|
----
|
|
*/
|
|
template secantMethod(alias fun)
|
|
{
|
|
Num secantMethod(Num)(Num xn_1, Num xn) {
|
|
auto fxn = unaryFun!(fun)(xn_1), d = xn_1 - xn;
|
|
typeof(fxn) fxn_1;
|
|
xn = xn_1;
|
|
while (!approxEqual(d, 0) && isfinite(d)) {
|
|
xn_1 = xn;
|
|
xn -= d;
|
|
fxn_1 = fxn;
|
|
fxn = unaryFun!(fun)(xn);
|
|
d *= -fxn / (fxn - fxn_1);
|
|
}
|
|
return xn;
|
|
}
|
|
}
|
|
|
|
unittest
|
|
{
|
|
scope(failure) stderr.writeln("Failure testing secantMethod");
|
|
float f(float x) {
|
|
return cos(x) - x*x*x;
|
|
}
|
|
invariant x = secantMethod!(f)(0f, 1f);
|
|
assert(approxEqual(x, 0.865474));
|
|
auto d = &f;
|
|
invariant y = secantMethod!(d)(0f, 1f);
|
|
assert(approxEqual(y, 0.865474));
|
|
}
|
|
|
|
|
|
private:
|
|
// Return true if a and b have opposite sign.
|
|
bool oppositeSigns(T)(T a, T b)
|
|
{
|
|
return signbit(a) != signbit(b);
|
|
}
|
|
|
|
public:
|
|
|
|
/** Find a real root of a real function f(x) via bracketing.
|
|
*
|
|
* Given a function $(D f) and a range $(D [a..b]) such that $(D f(a))
|
|
* and $(D f(b)) have opposite signs, returns the value of $(D x) in
|
|
* the range which is closest to a root of $(D f(x)). If $(D f(x))
|
|
* has more than one root in the range, one will be chosen
|
|
* arbitrarily. If $(D f(x)) returns NaN, NaN will be returned;
|
|
* otherwise, this algorithm is guaranteed to succeed.
|
|
*
|
|
* Uses an algorithm based on TOMS748, which uses inverse cubic
|
|
* interpolation whenever possible, otherwise reverting to parabolic
|
|
* or secant interpolation. Compared to TOMS748, this implementation
|
|
* improves worst-case performance by a factor of more than 100, and
|
|
* typical performance by a factor of 2. For 80-bit reals, most
|
|
* problems require 8 to 15 calls to $(D f(x)) to achieve full machine
|
|
* precision. The worst-case performance (pathological cases) is
|
|
* approximately twice the number of bits.
|
|
*
|
|
* References: "On Enclosing Simple Roots of Nonlinear Equations",
|
|
* G. Alefeld, F.A. Potra, Yixun Shi, Mathematics of Computation 61,
|
|
* pp733-744 (1993). Fortran code available from $(WEB
|
|
* www.netlib.org,www.netlib.org) as algorithm TOMS478.
|
|
*
|
|
*/
|
|
T findRoot(T, R)(R delegate(T) f, T a, T b)
|
|
{
|
|
auto r = findRoot(f, a, b, f(a), f(b), (T lo, T hi){ return false; });
|
|
// Return the first value if it is smaller or NaN
|
|
return fabs(r.field[2]) !> fabs(r.field[3]) ? r.field[0] : r.field[1];
|
|
}
|
|
|
|
/** Find root of a real function f(x) by bracketing, allowing the
|
|
* termination condition to be specified.
|
|
*
|
|
* Params:
|
|
*
|
|
* f = Function to be analyzed
|
|
*
|
|
* ax = Left bound of initial range of $(D f) known to contain the
|
|
* root.
|
|
*
|
|
* bx = Right bound of initial range of $(D f) known to contain the
|
|
* root.
|
|
*
|
|
* fax = Value of $(D f(ax)).
|
|
*
|
|
* fbx = Value of $(D f(bx)). ($(D f(ax)) and $(D f(bx)) are commonly
|
|
* known in advance.)
|
|
*
|
|
*
|
|
* tolerance = Defines an early termination condition. Receives the
|
|
* current upper and lower bounds on the root. The
|
|
* delegate must return $(D true) when these bounds are
|
|
* acceptable. If this function always returns $(D false),
|
|
* full machine precision will be achieved.
|
|
*
|
|
* Returns:
|
|
*
|
|
* A tuple consisting of two ranges. The first two elements are the
|
|
* range (in $(D x)) of the root, while the second pair of elements
|
|
* are the corresponding function values at those points. If an exact
|
|
* root was found, both of the first two elements will contain the
|
|
* root, and the second pair of elements will be 0.
|
|
*/
|
|
Tuple!(T, T, R, R) findRoot(T,R)(R delegate(T) f, T ax, T bx, R fax, R fbx,
|
|
bool delegate(T lo, T hi) tolerance)
|
|
in {
|
|
assert(ax<>=0 && bx<>=0, "Limits must not be NaN");
|
|
assert(signbit(fax) != signbit(fbx), "Parameters must bracket the root.");
|
|
}
|
|
body {
|
|
// This code is (heavily) modified from TOMS748 (www.netlib.org). Some ideas
|
|
// were borrowed from the Boost Mathematics Library.
|
|
|
|
T a, b, d; // [a..b] is our current bracket. d is the third best guess.
|
|
R fa, fb, fd; // Values of f at a, b, d.
|
|
bool done = false; // Has a root been found?
|
|
|
|
// Allow ax and bx to be provided in reverse order
|
|
if (ax <= bx) {
|
|
a = ax; fa = fax;
|
|
b = bx; fb = fbx;
|
|
} else {
|
|
a = bx; fa = fbx;
|
|
b = ax; fb = fax;
|
|
}
|
|
|
|
// Test the function at point c; update brackets accordingly
|
|
void bracket(T c)
|
|
{
|
|
T fc = f(c);
|
|
if (fc !<> 0) { // Exact solution, or NaN
|
|
a = c;
|
|
fa = fc;
|
|
d = c;
|
|
fd = fc;
|
|
done = true;
|
|
return;
|
|
}
|
|
// Determine new enclosing interval
|
|
if (signbit(fa) != signbit(fc)) {
|
|
d = b;
|
|
fd = fb;
|
|
b = c;
|
|
fb = fc;
|
|
} else {
|
|
d = a;
|
|
fd = fa;
|
|
a = c;
|
|
fa = fc;
|
|
}
|
|
}
|
|
|
|
/* Perform a secant interpolation. If the result would lie on a or b, or if
|
|
a and b differ so wildly in magnitude that the result would be meaningless,
|
|
perform a bisection instead.
|
|
*/
|
|
T secant_interpolate(T a, T b, T fa, T fb)
|
|
{
|
|
if (( ((a - b) == a) && b!=0) || (a!=0 && ((b - a) == b))) {
|
|
// Catastrophic cancellation
|
|
if (a == 0) a = copysign(0.0L, b);
|
|
else if (b == 0) b = copysign(0.0L, a);
|
|
else if (signbit(a) != signbit(b)) return 0;
|
|
T c = ieeeMean(a, b);
|
|
return c;
|
|
}
|
|
// avoid overflow
|
|
if (b - a > T.max) return b / 2.0 + a / 2.0;
|
|
if (fb - fa > T.max) return a - (b - a) / 2;
|
|
T c = a - (fa / (fb - fa)) * (b - a);
|
|
if (c == a || c == b) return (a + b) / 2;
|
|
return c;
|
|
}
|
|
|
|
/* Uses 'numsteps' newton steps to approximate the zero in [a..b] of the
|
|
quadratic polynomial interpolating f(x) at a, b, and d.
|
|
Returns:
|
|
The approximate zero in [a..b] of the quadratic polynomial.
|
|
*/
|
|
T newtonQuadratic(int numsteps)
|
|
{
|
|
// Find the coefficients of the quadratic polynomial.
|
|
T a0 = fa;
|
|
T a1 = (fb - fa)/(b - a);
|
|
T a2 = ((fd - fb)/(d - b) - a1)/(d - a);
|
|
|
|
// Determine the starting point of newton steps.
|
|
T c = oppositeSigns(a2, fa) ? a : b;
|
|
|
|
// start the safeguarded newton steps.
|
|
for (int i = 0; i<numsteps; ++i) {
|
|
T pc = a0 + (a1 + a2 * (c - b))*(c - a);
|
|
T pdc = a1 + a2*((2.0 * c) - (a + b));
|
|
if (pdc == 0) return a - a0 / a1;
|
|
else c = c - pc / pdc;
|
|
}
|
|
return c;
|
|
}
|
|
|
|
// On the first iteration we take a secant step:
|
|
if (fa !<> 0) {
|
|
done = true;
|
|
b = a;
|
|
fb = fa;
|
|
} else if (fb !<> 0) {
|
|
done = true;
|
|
a = b;
|
|
fa = fb;
|
|
} else {
|
|
bracket(secant_interpolate(a, b, fa, fb));
|
|
}
|
|
// Starting with the second iteration, higher-order interpolation can
|
|
// be used.
|
|
int itnum = 1; // Iteration number
|
|
int baditer = 1; // Num bisections to take if an iteration is bad.
|
|
T c, e; // e is our fourth best guess
|
|
R fe;
|
|
whileloop:
|
|
while(!done && (b != nextUp(a)) && !tolerance(a, b)) {
|
|
T a0 = a, b0 = b; // record the brackets
|
|
|
|
// Do two higher-order (cubic or parabolic) interpolation steps.
|
|
for (int QQ = 0; QQ < 2; ++QQ) {
|
|
// Cubic inverse interpolation requires that
|
|
// all four function values fa, fb, fd, and fe are distinct;
|
|
// otherwise use quadratic interpolation.
|
|
bool distinct = (fa != fb) && (fa != fd) && (fa != fe)
|
|
&& (fb != fd) && (fb != fe) && (fd != fe);
|
|
// The first time, cubic interpolation is impossible.
|
|
if (itnum<2) distinct = false;
|
|
bool ok = distinct;
|
|
if (distinct) {
|
|
// Cubic inverse interpolation of f(x) at a, b, d, and e
|
|
real q11 = (d - e) * fd / (fe - fd);
|
|
real q21 = (b - d) * fb / (fd - fb);
|
|
real q31 = (a - b) * fa / (fb - fa);
|
|
real d21 = (b - d) * fd / (fd - fb);
|
|
real d31 = (a - b) * fb / (fb - fa);
|
|
|
|
real q22 = (d21 - q11) * fb / (fe - fb);
|
|
real q32 = (d31 - q21) * fa / (fd - fa);
|
|
real d32 = (d31 - q21) * fd / (fd - fa);
|
|
real q33 = (d32 - q22) * fa / (fe - fa);
|
|
c = a + (q31 + q32 + q33);
|
|
if (c!<>=0 || (c <= a) || (c >= b)) {
|
|
// DAC: If the interpolation predicts a or b, it's
|
|
// probable that it's the actual root. Only allow this if
|
|
// we're already close to the root.
|
|
if (c == a && a - b != a) {
|
|
c = nextUp(a);
|
|
}
|
|
else if (c == b && a - b != -b) {
|
|
c = nextDown(b);
|
|
} else {
|
|
ok = false;
|
|
}
|
|
}
|
|
}
|
|
if (!ok) {
|
|
// DAC: Alefeld doesn't explain why the number of newton steps
|
|
// should vary.
|
|
c = newtonQuadratic(distinct ? 3 : 2);
|
|
if(c!<>=0 || (c <= a) || (c >= b)) {
|
|
// Failure, try a secant step:
|
|
c = secant_interpolate(a, b, fa, fb);
|
|
}
|
|
}
|
|
++itnum;
|
|
e = d;
|
|
fe = fd;
|
|
bracket(c);
|
|
if( done || ( b == nextUp(a)) || tolerance(a, b))
|
|
break whileloop;
|
|
if (itnum == 2)
|
|
continue whileloop;
|
|
}
|
|
// Now we take a double-length secant step:
|
|
T u;
|
|
R fu;
|
|
if(fabs(fa) < fabs(fb)) {
|
|
u = a;
|
|
fu = fa;
|
|
} else {
|
|
u = b;
|
|
fu = fb;
|
|
}
|
|
c = u - 2 * (fu / (fb - fa)) * (b - a);
|
|
// DAC: If the secant predicts a value equal to an endpoint, it's
|
|
// probably false.
|
|
if(c==a || c==b || c!<>=0 || fabs(c - u) > (b - a) / 2) {
|
|
if ((a-b) == a || (b-a) == b) {
|
|
if ( (a>0 && b<0) || (a<0 && b>0) ) c = 0;
|
|
else {
|
|
if (a==0) c = ieeeMean(copysign(0.0L, b), b);
|
|
else if (b==0) c = ieeeMean(copysign(0.0L, a), a);
|
|
else c = ieeeMean(a, b);
|
|
}
|
|
} else {
|
|
c = a + (b - a) / 2;
|
|
}
|
|
}
|
|
e = d;
|
|
fe = fd;
|
|
bracket(c);
|
|
if(done || (b == nextUp(a)) || tolerance(a, b))
|
|
break;
|
|
|
|
// IMPROVE THE WORST-CASE PERFORMANCE
|
|
// We must ensure that the bounds reduce by a factor of 2
|
|
// in binary space! every iteration. If we haven't achieved this
|
|
// yet, or if we don't yet know what the exponent is,
|
|
// perform a binary chop.
|
|
|
|
if( (a==0 || b==0 ||
|
|
(fabs(a) >= 0.5 * fabs(b) && fabs(b) >= 0.5 * fabs(a)))
|
|
&& (b - a) < 0.25 * (b0 - a0)) {
|
|
baditer = 1;
|
|
continue;
|
|
}
|
|
// DAC: If this happens on consecutive iterations, we probably have a
|
|
// pathological function. Perform a number of bisections equal to the
|
|
// total number of consecutive bad iterations.
|
|
|
|
if ((b - a) < 0.25 * (b0 - a0)) baditer = 1;
|
|
for (int QQ = 0; QQ < baditer ;++QQ) {
|
|
e = d;
|
|
fe = fd;
|
|
|
|
T w;
|
|
if ((a>0 && b<0) ||(a<0 && b>0)) w = 0;
|
|
else {
|
|
T usea = a;
|
|
T useb = b;
|
|
if (a == 0) usea = copysign(0.0L, b);
|
|
else if (b == 0) useb = copysign(0.0L, a);
|
|
w = ieeeMean(usea, useb);
|
|
}
|
|
bracket(w);
|
|
}
|
|
++baditer;
|
|
}
|
|
return Tuple!(T, T, R, R)(a, b, fa, fb);
|
|
}
|
|
|
|
unittest
|
|
{
|
|
int numProblems = 0;
|
|
int numCalls;
|
|
|
|
void testFindRoot(real delegate(real) f, real x1, real x2) {
|
|
numCalls=0;
|
|
++numProblems;
|
|
assert(x1<>=0 && x2<>=0);
|
|
auto result = findRoot(f, x1, x2, f(x1), f(x2),
|
|
(real lo, real hi) { return false; });
|
|
|
|
auto flo = f(result.field[0]);
|
|
auto fhi = f(result.field[1]);
|
|
if (flo!=0) {
|
|
assert(oppositeSigns(flo, fhi));
|
|
}
|
|
}
|
|
|
|
// Test functions
|
|
real cubicfn (real x) {
|
|
++numCalls;
|
|
if (x>float.max) x = float.max;
|
|
if (x<-double.max) x = -double.max;
|
|
// This has a single real root at -59.286543284815
|
|
return 0.386*x*x*x + 23*x*x + 15.7*x + 525.2;
|
|
}
|
|
// Test a function with more than one root.
|
|
real multisine(real x) { ++numCalls; return sin(x); }
|
|
testFindRoot( &multisine, 6, 90);
|
|
testFindRoot(&cubicfn, -100, 100);
|
|
testFindRoot( &cubicfn, -double.max, real.max);
|
|
|
|
|
|
/* Tests from the paper:
|
|
* "On Enclosing Simple Roots of Nonlinear Equations", G. Alefeld, F.A. Potra,
|
|
* Yixun Shi, Mathematics of Computation 61, pp733-744 (1993).
|
|
*/
|
|
// Parameters common to many alefeld tests.
|
|
int n;
|
|
real ale_a, ale_b;
|
|
|
|
int powercalls = 0;
|
|
|
|
real power(real x) {
|
|
++powercalls;
|
|
++numCalls;
|
|
return pow(x, n) + double.min;
|
|
}
|
|
int [] power_nvals = [3, 5, 7, 9, 19, 25];
|
|
// Alefeld paper states that pow(x,n) is a very poor case, where bisection
|
|
// outperforms his method, and gives total numcalls =
|
|
// 921 for bisection (2.4 calls per bit), 1830 for Alefeld (4.76/bit),
|
|
// 2624 for brent (6.8/bit)
|
|
// ... but that is for double, not real80.
|
|
// This poor performance seems mainly due to catastrophic cancellation,
|
|
// which is avoided here by the use of ieeeMean().
|
|
// I get: 231 (0.48/bit).
|
|
// IE this is 10X faster in Alefeld's worst case
|
|
numProblems=0;
|
|
foreach(k; power_nvals) {
|
|
n = k;
|
|
testFindRoot(&power, -1, 10);
|
|
}
|
|
|
|
int powerProblems = numProblems;
|
|
|
|
// Tests from Alefeld paper
|
|
|
|
int [9] alefeldSums;
|
|
real alefeld0(real x){
|
|
++alefeldSums[0];
|
|
++numCalls;
|
|
real q = sin(x) - x/2;
|
|
for (int i=1; i<20; ++i)
|
|
q+=(2*i-5.0)*(2*i-5.0)/((x-i*i)*(x-i*i)*(x-i*i));
|
|
return q;
|
|
}
|
|
real alefeld1(real x) {
|
|
++numCalls;
|
|
++alefeldSums[1];
|
|
return ale_a*x + exp(ale_b * x);
|
|
}
|
|
real alefeld2(real x) {
|
|
++numCalls;
|
|
++alefeldSums[2];
|
|
return pow(x, n) - ale_a;
|
|
}
|
|
real alefeld3(real x) {
|
|
++numCalls;
|
|
++alefeldSums[3];
|
|
return (1.0 +pow(1.0L-n, 2))*x - pow(1.0L-n*x, 2);
|
|
}
|
|
real alefeld4(real x) {
|
|
++numCalls;
|
|
++alefeldSums[4];
|
|
return x*x - pow(1-x, n);
|
|
}
|
|
|
|
real alefeld5(real x) {
|
|
++numCalls;
|
|
++alefeldSums[5];
|
|
return (1+pow(1.0L-n, 4))*x - pow(1.0L-n*x, 4);
|
|
}
|
|
|
|
real alefeld6(real x) {
|
|
++numCalls;
|
|
++alefeldSums[6];
|
|
return exp(-n*x)*(x-1.01L) + pow(x, n);
|
|
}
|
|
|
|
real alefeld7(real x) {
|
|
++numCalls;
|
|
++alefeldSums[7];
|
|
return (n*x-1)/((n-1)*x);
|
|
}
|
|
numProblems=0;
|
|
testFindRoot(&alefeld0, PI_2, PI);
|
|
for (n=1; n<=10; ++n) {
|
|
testFindRoot(&alefeld0, n*n+1e-9L, (n+1)*(n+1)-1e-9L);
|
|
}
|
|
ale_a = -40; ale_b = -1;
|
|
testFindRoot(&alefeld1, -9, 31);
|
|
ale_a = -100; ale_b = -2;
|
|
testFindRoot(&alefeld1, -9, 31);
|
|
ale_a = -200; ale_b = -3;
|
|
testFindRoot(&alefeld1, -9, 31);
|
|
int [] nvals_3 = [1, 2, 5, 10, 15, 20];
|
|
int [] nvals_5 = [1, 2, 4, 5, 8, 15, 20];
|
|
int [] nvals_6 = [1, 5, 10, 15, 20];
|
|
int [] nvals_7 = [2, 5, 15, 20];
|
|
|
|
for(int i=4; i<12; i+=2) {
|
|
n = i;
|
|
ale_a = 0.2;
|
|
testFindRoot(&alefeld2, 0, 5);
|
|
ale_a=1;
|
|
testFindRoot(&alefeld2, 0.95, 4.05);
|
|
testFindRoot(&alefeld2, 0, 1.5);
|
|
}
|
|
foreach(i; nvals_3) {
|
|
n=i;
|
|
testFindRoot(&alefeld3, 0, 1);
|
|
}
|
|
foreach(i; nvals_3) {
|
|
n=i;
|
|
testFindRoot(&alefeld4, 0, 1);
|
|
}
|
|
foreach(i; nvals_5) {
|
|
n=i;
|
|
testFindRoot(&alefeld5, 0, 1);
|
|
}
|
|
foreach(i; nvals_6) {
|
|
n=i;
|
|
testFindRoot(&alefeld6, 0, 1);
|
|
}
|
|
foreach(i; nvals_7) {
|
|
n=i;
|
|
testFindRoot(&alefeld7, 0.01L, 1);
|
|
}
|
|
real worstcase(real x) { ++numCalls;
|
|
return x<0.3*real.max? -0.999e-3 : 1.0;
|
|
}
|
|
testFindRoot(&worstcase, -real.max, real.max);
|
|
|
|
/*
|
|
int grandtotal=0;
|
|
foreach(calls; alefeldSums) {
|
|
grandtotal+=calls;
|
|
}
|
|
grandtotal-=2*numProblems;
|
|
printf("\nALEFELD TOTAL = %d avg = %f (alefeld avg=19.3 for double)\n",
|
|
grandtotal, (1.0*grandtotal)/numProblems);
|
|
powercalls -= 2*powerProblems;
|
|
printf("POWER TOTAL = %d avg = %f ", powercalls,
|
|
(1.0*powercalls)/powerProblems);
|
|
*/
|
|
}
|
|
|
|
/**
|
|
Computes $(LUCKY Euclidean distance) between input ranges $(D a) and
|
|
$(D b). The two ranges must have the same length. The three-parameter
|
|
version stops computation as soon as the distance is greater than or
|
|
equal to $(D limit) (this is useful to save computation if a small
|
|
distance is sought).
|
|
*/
|
|
CommonType!(ElementType!(Range1), ElementType!(Range2))
|
|
euclideanDistance(Range1, Range2)(Range1 a, Range2 b)
|
|
if (isInputRange!(Range1) && isInputRange!(Range2))
|
|
{
|
|
enum bool haveLen = hasLength!(Range1) && hasLength!(Range2);
|
|
static if (haveLen) enforce(a.length == b.length);
|
|
typeof(return) result = 0;
|
|
for (; !a.empty; a.popFront, b.popFront)
|
|
{
|
|
auto t = a.front - b.front;
|
|
result += t * t;
|
|
}
|
|
static if (!haveLen) enforce(b.empty);
|
|
return sqrt(result);
|
|
}
|
|
|
|
/// Ditto
|
|
CommonType!(ElementType!(Range1), ElementType!(Range2))
|
|
euclideanDistance(Range1, Range2, F)(Range1 a, Range2 b, F limit)
|
|
if (isInputRange!(Range1) && isInputRange!(Range2))
|
|
{
|
|
limit *= limit;
|
|
enum bool haveLen = hasLength!(Range1) && hasLength!(Range2);
|
|
static if (haveLen) enforce(a.length == b.length);
|
|
typeof(return) result = 0;
|
|
for (; ; a.popFront, b.popFront)
|
|
{
|
|
if (a.empty)
|
|
{
|
|
static if (!haveLen) enforce(b.empty);
|
|
break;
|
|
}
|
|
auto t = a.front - b.front;
|
|
result += t * t;
|
|
if (result >= limit) break;
|
|
}
|
|
return sqrt(result);
|
|
}
|
|
|
|
unittest
|
|
{
|
|
double[] a = [ 1., 2., ];
|
|
double[] b = [ 4., 6., ];
|
|
assert(euclideanDistance(a, b) == 5);
|
|
assert(euclideanDistance(a, b, 5) == 5);
|
|
assert(euclideanDistance(a, b, 4) == 5);
|
|
assert(euclideanDistance(a, b, 2) == 3);
|
|
}
|
|
|
|
/**
|
|
Computes the $(LUCKY dot product) of input ranges $(D a) and $(D
|
|
b). The two ranges must have the same length. If both ranges define
|
|
length, the check is done once; otherwise, it is done at each
|
|
iteration.
|
|
*/
|
|
CommonType!(ElementType!(Range1), ElementType!(Range2))
|
|
dotProduct(Range1, Range2)(Range1 a, Range2 b)
|
|
if (isInputRange!(Range1) && isInputRange!(Range2) &&
|
|
!(isArray!(Range1) && isArray!(Range2)))
|
|
{
|
|
enum bool haveLen = hasLength!(Range1) && hasLength!(Range2);
|
|
static if (haveLen) enforce(a.length == b.length);
|
|
typeof(return) result = 0;
|
|
for (; !a.empty; a.popFront, b.popFront)
|
|
{
|
|
result += a.front * b.front;
|
|
}
|
|
static if (!haveLen) enforce(b.empty);
|
|
return result;
|
|
}
|
|
|
|
/// Ditto
|
|
Unqual!(CommonType!(F1, F2))
|
|
dotProduct(F1, F2)(in F1[] avector, in F2[] bvector)
|
|
{
|
|
invariant n = avector.length;
|
|
assert(n == bvector.length);
|
|
auto avec = avector.ptr, bvec = bvector.ptr;
|
|
typeof(return) sum0 = 0.0, sum1 = 0.0;
|
|
|
|
const all_endp = avec + n;
|
|
const smallblock_endp = avec + (n & ~3);
|
|
const bigblock_endp = avec + (n & ~15);
|
|
|
|
for (; avec != bigblock_endp; avec += 16, bvec += 16)
|
|
{
|
|
sum0 += avec[0] * bvec[0];
|
|
sum1 += avec[1] * bvec[1];
|
|
sum0 += avec[2] * bvec[2];
|
|
sum1 += avec[3] * bvec[3];
|
|
sum0 += avec[4] * bvec[4];
|
|
sum1 += avec[5] * bvec[5];
|
|
sum0 += avec[6] * bvec[6];
|
|
sum1 += avec[7] * bvec[7];
|
|
sum0 += avec[8] * bvec[8];
|
|
sum1 += avec[9] * bvec[9];
|
|
sum0 += avec[10] * bvec[10];
|
|
sum1 += avec[11] * bvec[11];
|
|
sum0 += avec[12] * bvec[12];
|
|
sum1 += avec[13] * bvec[13];
|
|
sum0 += avec[14] * bvec[14];
|
|
sum1 += avec[15] * bvec[15];
|
|
}
|
|
|
|
for (; avec != smallblock_endp; avec += 4, bvec += 4) {
|
|
sum0 += avec[0] * bvec[0];
|
|
sum1 += avec[1] * bvec[1];
|
|
sum0 += avec[2] * bvec[2];
|
|
sum1 += avec[3] * bvec[3];
|
|
}
|
|
|
|
sum0 += sum1;
|
|
|
|
/* Do trailing portion in naive loop. */
|
|
while (avec != all_endp)
|
|
sum0 += (*avec++) * (*bvec++);
|
|
|
|
return sum0;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
double[] a = [ 1., 2., ];
|
|
double[] b = [ 4., 6., ];
|
|
assert(dotProduct(a, b) == 16);
|
|
}
|
|
|
|
/**
|
|
Computes the $(LUCKY cosine similarity) of input ranges $(D a) and $(D
|
|
b). The two ranges must have the same length. If both ranges define
|
|
length, the check is done once; otherwise, it is done at each
|
|
iteration. If either range has all-zero elements, return 0.
|
|
*/
|
|
CommonType!(ElementType!(Range1), ElementType!(Range2))
|
|
cosineSimilarity(Range1, Range2)(Range1 a, Range2 b)
|
|
if (isInputRange!(Range1) && isInputRange!(Range2))
|
|
{
|
|
enum bool haveLen = hasLength!(Range1) && hasLength!(Range2);
|
|
static if (haveLen) enforce(a.length == b.length);
|
|
FPTemporary!(typeof(return)) norma = 0, normb = 0, dotprod = 0;
|
|
for (; !a.empty; a.popFront, b.popFront)
|
|
{
|
|
immutable t1 = a.front, t2 = b.front;
|
|
norma += t1 * t1;
|
|
normb += t2 * t2;
|
|
dotprod += t1 * t2;
|
|
}
|
|
static if (!haveLen) enforce(b.empty);
|
|
if (norma == 0 || normb == 0) return 0;
|
|
return dotprod / sqrt(norma * normb);
|
|
}
|
|
|
|
unittest
|
|
{
|
|
double[] a = [ 1., 2., ];
|
|
double[] b = [ 4., 3., ];
|
|
// writeln(cosineSimilarity(a, b));
|
|
// writeln(10.0 / sqrt(5.0 * 25));
|
|
assert(approxEqual(
|
|
cosineSimilarity(a, b), 10.0 / sqrt(5.0 * 25),
|
|
0.01));
|
|
}
|
|
|
|
/**
|
|
Normalizes values in $(D range) by multiplying each element with a
|
|
number chosen such that values sum up to $(D sum). If elements in $(D
|
|
range) sum to zero, assigns $(D sum / range.length) to
|
|
all. Normalization makes sense only if all elements in $(D range) are
|
|
positive. $(D normalize) assumes that is the case without checking it.
|
|
|
|
Returns: $(D true) if normalization completed normally, $(D false) if
|
|
all elements in $(D range) were zero or if $(D range) is empty.
|
|
*/
|
|
bool normalize(R)(R range, ElementType!(R) sum = 1) if (isForwardRange!(R))
|
|
{
|
|
ElementType!(R) s = 0;
|
|
// Step 1: Compute sum and length of the range
|
|
static if (hasLength!(R))
|
|
{
|
|
const length = range.length;
|
|
foreach (e; range)
|
|
{
|
|
s += e;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
uint length = 0;
|
|
foreach (e; range)
|
|
{
|
|
s += e;
|
|
++length;
|
|
}
|
|
}
|
|
// Step 2: perform normalization
|
|
if (s == 0)
|
|
{
|
|
if (length)
|
|
{
|
|
auto f = sum / range.length;
|
|
foreach (ref e; range) e = f;
|
|
}
|
|
return false;
|
|
}
|
|
// The path most traveled
|
|
assert(s >= 0);
|
|
auto f = sum / s;
|
|
foreach (ref e; range) e *= f;
|
|
return true;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
double[] a = [];
|
|
assert(!normalize(a));
|
|
a = [ 1., 3. ];
|
|
assert(normalize(a));
|
|
assert(a == [ 0.25, 0.75 ]);
|
|
a = [ 0., 0. ];
|
|
assert(!normalize(a));
|
|
assert(a == [ 0.5, 0.5 ]);
|
|
}
|
|
|
|
/**
|
|
Computes $(LUCKY _entropy) of input range $(D r) in bits. This
|
|
function assumes (without checking) that the values in $(D r) are all
|
|
in $(D [0, 1]). For the entropy to be meaningful, often $(D r) should
|
|
be normalized too (i.e., its values should sum to 1). The
|
|
two-parameter version stops evaluating as soon as the intermediate
|
|
result is greater than or equal to $(D max).
|
|
*/
|
|
ElementType!Range entropy(Range)(Range r) if (isInputRange!Range)
|
|
{
|
|
typeof(return) result = 0.0;
|
|
foreach (e; r)
|
|
{
|
|
if (!e) continue;
|
|
result -= e * log2(e);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/// Ditto
|
|
ElementType!Range entropy(Range, F)(Range r, F max)
|
|
if (isInputRange!Range
|
|
&& !is(CommonType!(ElementType!Range, F) == void))
|
|
{
|
|
typeof(return) result = 0.0;
|
|
foreach (e; r)
|
|
{
|
|
if (!e) continue;
|
|
result -= e * log2(e);
|
|
if (result >= max) break;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
double[] p = [ 0.0, 0, 0, 1 ];
|
|
assert(entropy(p) == 0);
|
|
p = [ 0.25, 0.25, 0.25, 0.25 ];
|
|
assert(entropy(p) == 2);
|
|
assert(entropy(p, 1) == 1);
|
|
}
|
|
|
|
/**
|
|
Computes the $(LUCKY Kullback-Leibler divergence) between input ranges
|
|
$(D a) and $(D b), which is the sum $(D ai * log(ai / bi)). The base
|
|
of logarithm is 2. The ranges are assumed to contain elements in $(D
|
|
[0, 1]). Usually the ranges are normalized probability distributions,
|
|
but this is not required or checked by $(D
|
|
kullbackLeiblerDivergence). If any element of $(D b) is zero, returns
|
|
infinity. If the inputs are normalized, the result is positive.
|
|
*/
|
|
CommonType!(ElementType!Range1, ElementType!Range2)
|
|
kullbackLeiblerDivergence(Range1, Range2)(Range1 a, Range2 b)
|
|
if (isInputRange!(Range1) && isInputRange!(Range2))
|
|
{
|
|
enum bool haveLen = hasLength!(Range1) && hasLength!(Range2);
|
|
static if (haveLen) enforce(a.length == b.length);
|
|
FPTemporary!(typeof(return)) result = 0;
|
|
for (; !a.empty; a.popFront, b.popFront)
|
|
{
|
|
immutable t1 = a.front;
|
|
if (t1 == 0) continue;
|
|
immutable t2 = b.front;
|
|
if (t2 == 0) return result.infinity;
|
|
assert(t1 > 0 && t2 > 0);
|
|
result += t1 * log2(t1 / t2);
|
|
}
|
|
static if (!haveLen) enforce(b.empty);
|
|
return result;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
double[] p = [ 0.0, 0, 0, 1 ];
|
|
assert(kullbackLeiblerDivergence(p, p) == 0);
|
|
double[] p1 = [ 0.25, 0.25, 0.25, 0.25 ];
|
|
assert(kullbackLeiblerDivergence(p1, p1) == 0);
|
|
assert(kullbackLeiblerDivergence(p, p1) == 2);
|
|
assert(kullbackLeiblerDivergence(p1, p) == double.infinity);
|
|
double[] p2 = [ 0.2, 0.2, 0.2, 0.4 ];
|
|
assert(approxEqual(kullbackLeiblerDivergence(p1, p2), 0.0719281));
|
|
assert(approxEqual(kullbackLeiblerDivergence(p2, p1), 0.0780719));
|
|
}
|
|
|
|
/**
|
|
Computes the $(LUCKY Jensen-Shannon divergence) between $(D a) and $(D
|
|
b), which is the sum $(D (ai * log(2 * ai / (ai + bi)) + bi * log(2 *
|
|
bi / (ai + bi))) / 2). The base of logarithm is 2. The ranges are
|
|
assumed to contain elements in $(D [0, 1]). Usually the ranges are
|
|
normalized probability distributions, but this is not required or
|
|
checked by $(D jensenShannonDivergence). If the inputs are normalized,
|
|
the result is bounded within $(D [0, 1]). The three-parameter version
|
|
stops evaluations as soon as the intermediate result is greater than
|
|
or equal to $(D limit).
|
|
*/
|
|
CommonType!(ElementType!Range1, ElementType!Range2)
|
|
jensenShannonDivergence(Range1, Range2)(Range1 a, Range2 b)
|
|
if (isInputRange!Range1 && isInputRange!Range2)
|
|
{
|
|
enum bool haveLen = hasLength!(Range1) && hasLength!(Range2);
|
|
static if (haveLen) enforce(a.length == b.length);
|
|
FPTemporary!(typeof(return)) result = 0;
|
|
for (; !a.empty; a.popFront, b.popFront)
|
|
{
|
|
immutable t1 = a.front;
|
|
immutable t2 = b.front;
|
|
immutable avg = (t1 + t2) / 2;
|
|
if (t1 != 0)
|
|
{
|
|
result += t1 * log2(t1 / avg);
|
|
}
|
|
if (t2 != 0)
|
|
{
|
|
result += t2 * log2(t2 / avg);
|
|
}
|
|
}
|
|
static if (!haveLen) enforce(b.empty);
|
|
return result / 2;
|
|
}
|
|
|
|
/// Ditto
|
|
CommonType!(ElementType!Range1, ElementType!Range2)
|
|
jensenShannonDivergence(Range1, Range2, F)(Range1 a, Range2 b, F limit)
|
|
if (isInputRange!Range1 && isInputRange!Range2
|
|
&& is(typeof(CommonType!(ElementType!Range1, ElementType!Range2).init
|
|
>= F.init) : bool))
|
|
{
|
|
enum bool haveLen = hasLength!(Range1) && hasLength!(Range2);
|
|
static if (haveLen) enforce(a.length == b.length);
|
|
FPTemporary!(typeof(return)) result = 0;
|
|
limit *= 2;
|
|
for (; !a.empty; a.popFront, b.popFront)
|
|
{
|
|
immutable t1 = a.front;
|
|
immutable t2 = b.front;
|
|
immutable avg = (t1 + t2) / 2;
|
|
if (t1 != 0)
|
|
{
|
|
result += t1 * log2(t1 / avg);
|
|
}
|
|
if (t2 != 0)
|
|
{
|
|
result += t2 * log2(t2 / avg);
|
|
}
|
|
if (result >= limit) break;
|
|
}
|
|
static if (!haveLen) enforce(b.empty);
|
|
return result / 2;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
double[] p = [ 0.0, 0, 0, 1 ];
|
|
assert(jensenShannonDivergence(p, p) == 0);
|
|
double[] p1 = [ 0.25, 0.25, 0.25, 0.25 ];
|
|
assert(jensenShannonDivergence(p1, p1) == 0);
|
|
assert(approxEqual(jensenShannonDivergence(p1, p), 0.548795));
|
|
double[] p2 = [ 0.2, 0.2, 0.2, 0.4 ];
|
|
assert(approxEqual(jensenShannonDivergence(p1, p2), 0.0186218));
|
|
assert(approxEqual(jensenShannonDivergence(p2, p1), 0.0186218));
|
|
assert(approxEqual(jensenShannonDivergence(p2, p1, 0.005), 0.00602366));
|
|
}
|
|
|
|
// template tabulateFixed(alias fun, uint n,
|
|
// real maxError, real left, real right)
|
|
// {
|
|
// ReturnType!(fun) tabulateFixed(ParameterTypeTuple!(fun) arg)
|
|
// {
|
|
// alias ParameterTypeTuple!(fun)[0] num;
|
|
// static num[n] table;
|
|
// alias arg[0] x;
|
|
// enforce(left <= x && x < right);
|
|
// invariant i = cast(uint) (table.length
|
|
// * ((x - left) / (right - left)));
|
|
// assert(i < n);
|
|
// if (isnan(table[i])) {
|
|
// // initialize it
|
|
// auto x1 = left + i * (right - left) / n;
|
|
// auto x2 = left + (i + 1) * (right - left) / n;
|
|
// invariant y1 = fun(x1), y2 = fun(x2);
|
|
// invariant y = 2 * y1 * y2 / (y1 + y2);
|
|
// num wyda(num xx) { return fun(xx) - y; }
|
|
// auto bestX = findRoot(&wyda, x1, x2);
|
|
// table[i] = fun(bestX);
|
|
// invariant leftError = abs((table[i] - y1) / y1);
|
|
// enforce(leftError <= maxError, text(leftError, " > ", maxError));
|
|
// invariant rightError = abs((table[i] - y2) / y2);
|
|
// enforce(rightError <= maxError, text(rightError, " > ", maxError));
|
|
// }
|
|
// return table[i];
|
|
// }
|
|
// }
|
|
|
|
// unittest
|
|
// {
|
|
// enum epsilon = 0.01;
|
|
// alias tabulateFixed!(tanh, 700, epsilon, 0.2, 3) fasttanh;
|
|
// uint testSize = 100000;
|
|
// auto rnd = Random(unpredictableSeed);
|
|
// foreach (i; 0 .. testSize) {
|
|
// invariant x = uniform(rnd, 0.2F, 3.0F);
|
|
// invariant float y = fasttanh(x), w = tanh(x);
|
|
// invariant e = abs(y - w) / w;
|
|
// //writefln("%.20f", e);
|
|
// enforce(e <= epsilon, text("x = ", x, ", fasttanh(x) = ", y,
|
|
// ", tanh(x) = ", w, ", relerr = ", e));
|
|
// }
|
|
// }
|
|
|
|
/**
|
|
The so-called "all-lengths gap-weighted string kernel" computes a
|
|
similarity measure between $(D s) and $(D t) based on all of their
|
|
common subsequences of all lengths. Gapped subsequences are also
|
|
included.
|
|
|
|
To understand what $(D gapWeightedSimilarity(s, t, lambda)) computes,
|
|
consider first the case $(D lambda = 1) and the strings $(D s =
|
|
["Hello", "brave", "new", "world"]) and $(D t = ["Hello", "new",
|
|
"world"]). In that case, $(D gapWeightedSimilarity) counts the
|
|
following matches:
|
|
|
|
$(OL $(LI three matches of length 1, namely $(D "Hello"), $(D "new"),
|
|
and $(D "world");) $(LI three matches of length 2, namely ($(D
|
|
"Hello", "new")), ($(D "Hello", "world")), and ($(D "new", "world"));)
|
|
$(LI one match of length 3, namely ($(D "Hello", "new", "world")).))
|
|
|
|
The call $(D gapWeightedSimilarity(s, t, 1)) simply counts all of
|
|
these matches and adds them up, returning 7.
|
|
|
|
----
|
|
string[] s = ["Hello", "brave", "new", "world"];
|
|
string[] t = ["Hello", "new", "world"];
|
|
assert(gapWeightedSimilarity(s, t, 1) == 7);
|
|
----
|
|
|
|
Note how the gaps in matching are simply ignored, for example ($(D
|
|
"Hello", "new")) is deemed as good a match as ($(D "new",
|
|
"world")). This may be too permissive for some applications. To
|
|
eliminate gapped matches entirely, use $(D lambda = 0):
|
|
|
|
----
|
|
string[] s = ["Hello", "brave", "new", "world"];
|
|
string[] t = ["Hello", "new", "world"];
|
|
assert(gapWeightedSimilarity(s, t, 0) == 4);
|
|
----
|
|
|
|
The call above eliminated the gapped matches ($(D "Hello", "new")),
|
|
($(D "Hello", "world")), and ($(D "Hello", "new", "world")) from the
|
|
tally. That leaves only 4 matches.
|
|
|
|
The most interesting case is when gapped matches still participate in
|
|
the result, but not as strongly as ungapped matches. The result will
|
|
be a smooth, fine-grained similarity measure between the input
|
|
strings. This is where values of $(D lambda) between 0 and 1 enter
|
|
into play: gapped matches are $(I exponentially penalized with the
|
|
number of gaps) with base $(D lambda). This means that an ungapped
|
|
match adds 1 to the return value; a match with one gap in either
|
|
string adds $(D lambda) to the return value; ...; a match with a total
|
|
of $(D n) gaps in both strings adds $(D pow(lambda, n)) to the return
|
|
value. In the example above, we have 4 matches without gaps, 2 matches
|
|
with one gap, and 1 match with three gaps. The latter match is ($(D
|
|
"Hello", "world")), which has two gaps in the first string and one gap
|
|
in the second string, totaling to three gaps. Summing these up we get
|
|
$(D 4 + 2 * lambda + pow(lambda, 3)).
|
|
|
|
----
|
|
string[] s = ["Hello", "brave", "new", "world"];
|
|
string[] t = ["Hello", "new", "world"];
|
|
assert(gapWeightedSimilarity(s, t, 0.5) == 4 + 0.5 * 2 + 0.125);
|
|
----
|
|
|
|
$(D gapWeightedSimilarity) is useful wherever a smooth similarity
|
|
measure between sequences allowing for approximate matches is
|
|
needed. The examples above are given with words, but any sequences
|
|
with elements comparable for equality are allowed, e.g. characters or
|
|
numbers. $(D gapWeightedSimilarity) uses a highly optimized dynamic
|
|
programming implementation that needs $(D 16 * min(s.length,
|
|
t.length)) extra bytes of memory and $(BIGOH s.length * t.length) time
|
|
to complete.
|
|
*/
|
|
F gapWeightedSimilarity(alias comp = "a == b", R1, R2, F)(R1 s, R2 t, F lambda)
|
|
if (isRandomAccessRange!(R1) && hasLength!(R1)
|
|
&& isRandomAccessRange!(R2) && hasLength!(R2))
|
|
{
|
|
if (s.length < t.length) return gapWeightedSimilarity(t, s, lambda);
|
|
if (!t.length) return 0;
|
|
immutable tl1 = t.length + 1;
|
|
auto dpvi = enforce(cast(F*) malloc(F.sizeof * 2 * t.length));
|
|
auto dpvi1 = dpvi + t.length;
|
|
scope(exit) free(dpvi < dpvi1 ? dpvi : dpvi1);
|
|
dpvi[0 .. t.length] = 0;
|
|
dpvi1[0] = 0;
|
|
immutable lambda2 = lambda * lambda;
|
|
|
|
F result = 0;
|
|
foreach (i; 0 .. s.length)
|
|
{
|
|
const si = s[i];
|
|
for (size_t j = 0;;)
|
|
{
|
|
F dpsij = void;
|
|
if (binaryFun!(comp)(si, t[j]))
|
|
{
|
|
dpsij = 1 + dpvi[j];
|
|
result += dpsij;
|
|
}
|
|
else
|
|
{
|
|
dpsij = 0;
|
|
}
|
|
immutable j1 = j + 1;
|
|
if (j1 == t.length) break;
|
|
dpvi1[j1] = dpsij + lambda * (dpvi1[j] + dpvi[j1])
|
|
- lambda2 * dpvi[j];
|
|
j = j1;
|
|
}
|
|
swap(dpvi, dpvi1);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
string[] s = ["Hello", "brave", "new", "world"];
|
|
string[] t = ["Hello", "new", "world"];
|
|
assert(gapWeightedSimilarity(s, t, 1) == 7);
|
|
assert(gapWeightedSimilarity(s, t, 0) == 4);
|
|
assert(gapWeightedSimilarity(s, t, 0.5) == 4 + 2 * 0.5 + 0.125);
|
|
}
|
|
|
|
/**
|
|
The similarity per $(D gapWeightedSimilarity) has an issue in that it
|
|
grows with the lengths of the two strings, even though the strings are
|
|
not actually very similar. For example, the range $(D ["Hello",
|
|
"world"]) is increasingly similar with the range $(D ["Hello",
|
|
"world", "world", "world",...]) as more instances of $(D "world") are
|
|
appended. To prevent that, $(D gapWeightedSimilarityNormalized)
|
|
computes a normalized version of the similarity that is computed as
|
|
$(D gapWeightedSimilarity(s, t, lambda) /
|
|
sqrt(gapWeightedSimilarity(s, t, lambda) * gapWeightedSimilarity(s, t,
|
|
lambda))). The function $(D gapWeightedSimilarityNormalized) (a
|
|
so-called normalized kernel) is bounded in $(D [0, 1]), reaches $(D 0)
|
|
only for ranges that don't match in any position, and $(D 1) only for
|
|
identical ranges.
|
|
|
|
Example:
|
|
----
|
|
string[] s = ["Hello", "brave", "new", "world"];
|
|
string[] t = ["Hello", "new", "world"];
|
|
assert(gapWeightedSimilarity(s, s, 1) == 15);
|
|
assert(gapWeightedSimilarity(t, t, 1) == 7);
|
|
assert(gapWeightedSimilarity(s, t, 1) == 7);
|
|
assert(gapWeightedSimilarityNormalized(s, t, 1) ==
|
|
7. / sqrt(15. * 7));
|
|
----
|
|
|
|
The optional parameters $(D sSelfSim) and $(D tSelfSim) are meant for
|
|
avoiding duplicate computation. Many applications may have already
|
|
computed $(D gapWeightedSimilarity(s, s, lambda)) and/or $(D
|
|
gapWeightedSimilarity(t, t, lambda)). In that case, they can be passed
|
|
as $(D sSelfSim) and $(D tSelfSim), respectively.
|
|
*/
|
|
Select!(isFloatingPoint!(F), F, double)
|
|
gapWeightedSimilarityNormalized
|
|
(alias comp = "a == b", R1, R2, F)(R1 s, R2 t, F lambda,
|
|
F sSelfSim = F.init, F tSelfSim = F.init)
|
|
if (isRandomAccessRange!(R1) && hasLength!(R1)
|
|
&& isRandomAccessRange!(R2) && hasLength!(R2))
|
|
{
|
|
static bool uncomputed(F n)
|
|
{
|
|
static if (isFloatingPoint!(F)) return isnan(n);
|
|
else return n == n.init;
|
|
}
|
|
if (uncomputed(sSelfSim))
|
|
sSelfSim = gapWeightedSimilarity!(comp)(s, s, lambda);
|
|
if (sSelfSim == 0) return 0;
|
|
if (uncomputed(tSelfSim))
|
|
tSelfSim = gapWeightedSimilarity!(comp)(t, t, lambda);
|
|
if (tSelfSim == 0) return 0;
|
|
return gapWeightedSimilarity!(comp)(s, t, lambda)
|
|
/ sqrt(cast(typeof(return)) sSelfSim * tSelfSim);
|
|
}
|
|
|
|
unittest
|
|
{
|
|
string[] s = ["Hello", "brave", "new", "world"];
|
|
string[] t = ["Hello", "new", "world"];
|
|
assert(gapWeightedSimilarity(s, s, 1) == 15);
|
|
assert(gapWeightedSimilarity(t, t, 1) == 7);
|
|
assert(gapWeightedSimilarity(s, t, 1) == 7);
|
|
assert(approxEqual(gapWeightedSimilarityNormalized(s, t, 1),
|
|
7. / sqrt(15. * 7), 0.01));
|
|
}
|
|
|
|
/**
|
|
Similar to $(D gapWeightedSimilarity), just works in an incremental
|
|
manner by first revealing the matches of length 1, then gapped matches
|
|
of length 2, and so on. The memory requirement is $(BIGOH s.length *
|
|
t.length). The time complexity is $(BIGOH s.length * t.length) time
|
|
for computing each step. Continuing on the previous example:
|
|
|
|
----
|
|
string[] s = ["Hello", "brave", "new", "world"];
|
|
string[] t = ["Hello", "new", "world"];
|
|
auto simIter = gapWeightedSimilarityIncremental(s, t, 1);
|
|
assert(simIter.front == 3); // three 1-length matches
|
|
simIter.popFront;
|
|
assert(simIter.front == 3); // three 2-length matches
|
|
simIter.popFront;
|
|
assert(simIter.front == 1); // one 3-length match
|
|
simIter.popFront;
|
|
assert(simIter.empty); // no more match
|
|
----
|
|
|
|
The implementation is based on the pseudocode in Fig. 4 of the paper
|
|
$(WEB jmlr.csail.mit.edu/papers/volume6/rousu05a/rousu05a.pdf,
|
|
"Efficient Computation of Gapped Substring Kernels on Large Alphabets")
|
|
by Rousu et al., with additional algorithmic and systems-level
|
|
optimizations.
|
|
*/
|
|
struct GapWeightedSimilarityIncremental(Range, F = double)
|
|
if (isRandomAccessRange!(Range) && hasLength!(Range))
|
|
{
|
|
private:
|
|
Range s, t;
|
|
F currentValue = 0;
|
|
F * kl;
|
|
size_t gram = void;
|
|
F lambda = void, lambda2 = void;
|
|
|
|
public:
|
|
/**
|
|
Constructs an object given two ranges $(D s) and $(D t) and a penalty
|
|
$(D lambda). Constructor completes in $(BIGOH s.length * t.length)
|
|
time and computes all matches of length 1.
|
|
*/
|
|
this(Range s, Range t, F lambda) {
|
|
enforce(lambda > 0);
|
|
this.lambda = lambda;
|
|
this.lambda2 = lambda * lambda; // for efficiency only
|
|
|
|
size_t iMin = size_t.max, jMin = size_t.max,
|
|
iMax = 0, jMax = 0;
|
|
/* initialize */
|
|
Tuple!(uint, uint) * k0;
|
|
size_t k0len;
|
|
scope(exit) free(k0);
|
|
currentValue = 0;
|
|
foreach (i, si; s) {
|
|
foreach (j; 0 .. t.length) {
|
|
if (si != t[j]) continue;
|
|
k0 = cast(typeof(k0))
|
|
realloc(k0, ++k0len * (*k0).sizeof);
|
|
with (k0[k0len - 1]) {
|
|
field[0] = i;
|
|
field[1] = j;
|
|
}
|
|
// Maintain the minimum and maximum i and j
|
|
if (iMin > i) iMin = i;
|
|
if (iMax < i) iMax = i;
|
|
if (jMin > j) jMin = j;
|
|
if (jMax < j) jMax = j;
|
|
}
|
|
}
|
|
|
|
if (iMin > iMax) return;
|
|
assert(k0len);
|
|
|
|
currentValue = k0len;
|
|
// Chop strings down to the useful sizes
|
|
s = s[iMin .. iMax + 1];
|
|
t = t[jMin .. jMax + 1];
|
|
this.s = s;
|
|
this.t = t;
|
|
|
|
// Si = errnoEnforce(cast(F *) malloc(t.length * F.sizeof));
|
|
kl = errnoEnforce(cast(F *) malloc(s.length * t.length * F.sizeof));
|
|
|
|
kl[0 .. s.length * t.length] = 0;
|
|
foreach (pos; 0 .. k0len) {
|
|
with (k0[pos]) {
|
|
kl[(field[0] - iMin) * t.length + field[1] -jMin] = lambda2;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
Returns $(D this).
|
|
*/
|
|
ref GapWeightedSimilarityIncremental opSlice()
|
|
{
|
|
return this;
|
|
}
|
|
|
|
/**
|
|
Computes the match of the popFront length. Completes in $(BIGOH s.length *
|
|
t.length) time.
|
|
*/
|
|
void popFront() {
|
|
// This is a large source of optimization: if similarity at
|
|
// the gram-1 level was 0, then we can safely assume
|
|
// similarity at the gram level is 0 as well.
|
|
if (empty) return;
|
|
|
|
// Now attempt to match gapped substrings of length `gram'
|
|
++gram;
|
|
currentValue = 0;
|
|
|
|
auto Si = cast(F*) alloca(t.length * F.sizeof);
|
|
Si[0 .. t.length] = 0;
|
|
foreach (i; 0 .. s.length)
|
|
{
|
|
const si = s[i];
|
|
F Sij_1 = 0;
|
|
F Si_1j_1 = 0;
|
|
auto kli = kl + i * t.length;
|
|
for (size_t j = 0;;)
|
|
{
|
|
const klij = kli[j];
|
|
const Si_1j = Si[j];
|
|
const tmp = klij + lambda * (Si_1j + Sij_1) - lambda2 * Si_1j_1;
|
|
// now update kl and currentValue
|
|
if (si == t[j])
|
|
currentValue += kli[j] = lambda2 * Si_1j_1;
|
|
else
|
|
kli[j] = 0;
|
|
// commit to Si
|
|
Si[j] = tmp;
|
|
if (++j == t.length) break;
|
|
// get ready for the popFront step; virtually increment j,
|
|
// so essentially stuffj_1 <-- stuffj
|
|
Si_1j_1 = Si_1j;
|
|
Sij_1 = tmp;
|
|
}
|
|
}
|
|
currentValue /= pow(lambda, 2 * (gram + 1));
|
|
|
|
version (none)
|
|
{
|
|
Si_1[0 .. t.length] = 0;
|
|
kl[0 .. min(t.length, maxPerimeter + 1)] = 0;
|
|
foreach (i; 1 .. min(s.length, maxPerimeter + 1)) {
|
|
auto kli = kl + i * t.length;
|
|
assert(s.length > i);
|
|
const si = s[i];
|
|
auto kl_1i_1 = kl_1 + (i - 1) * t.length;
|
|
kli[0] = 0;
|
|
F lastS = 0;
|
|
foreach (j; 1 .. min(maxPerimeter - i + 1, t.length)) {
|
|
immutable j_1 = j - 1;
|
|
immutable tmp = kl_1i_1[j_1]
|
|
+ lambda * (Si_1[j] + lastS)
|
|
- lambda2 * Si_1[j_1];
|
|
kl_1i_1[j_1] = float.nan;
|
|
Si_1[j_1] = lastS;
|
|
lastS = tmp;
|
|
if (si == t[j]) {
|
|
currentValue += kli[j] = lambda2 * lastS;
|
|
} else {
|
|
kli[j] = 0;
|
|
}
|
|
}
|
|
Si_1[t.length - 1] = lastS;
|
|
}
|
|
currentValue /= pow(lambda, 2 * (gram + 1));
|
|
// get ready for the popFront computation
|
|
swap(kl, kl_1);
|
|
}
|
|
}
|
|
|
|
/**
|
|
Returns the gapped similarity at the current match length (initially
|
|
1, grows with each call to $(D popFront)).
|
|
*/
|
|
F front() { return currentValue; }
|
|
|
|
/**
|
|
Returns whether there are more matches.
|
|
*/
|
|
bool empty() {
|
|
if (currentValue) return false;
|
|
if (kl) {
|
|
free(kl);
|
|
kl = null;
|
|
}
|
|
return true;
|
|
}
|
|
}
|
|
|
|
/**
|
|
Ditto
|
|
*/
|
|
GapWeightedSimilarityIncremental!(R, F) gapWeightedSimilarityIncremental(R, F)
|
|
(R r1, R r2, F penalty)
|
|
{
|
|
return typeof(return)(r1, r2, penalty);
|
|
}
|
|
|
|
unittest
|
|
{
|
|
string[] s = ["Hello", "brave", "new", "world"];
|
|
string[] t = ["Hello", "new", "world"];
|
|
auto simIter = gapWeightedSimilarityIncremental(s, t, 1.0);
|
|
//foreach (e; simIter) writeln(e);
|
|
assert(simIter.front == 3); // three 1-length matches
|
|
simIter.popFront;
|
|
assert(simIter.front == 3, text(simIter.front)); // three 2-length matches
|
|
simIter.popFront;
|
|
assert(simIter.front == 1); // one 3-length matches
|
|
simIter.popFront;
|
|
assert(simIter.empty); // no more match
|
|
|
|
s = ["Hello"];
|
|
t = ["bye"];
|
|
simIter = gapWeightedSimilarityIncremental(s, t, 0.5);
|
|
assert(simIter.empty);
|
|
|
|
s = ["Hello"];
|
|
t = ["Hello"];
|
|
simIter = gapWeightedSimilarityIncremental(s, t, 0.5);
|
|
assert(simIter.front == 1); // one match
|
|
simIter.popFront;
|
|
assert(simIter.empty);
|
|
|
|
s = ["Hello", "world"];
|
|
t = ["Hello"];
|
|
simIter = gapWeightedSimilarityIncremental(s, t, 0.5);
|
|
assert(simIter.front == 1); // one match
|
|
simIter.popFront;
|
|
assert(simIter.empty);
|
|
|
|
s = ["Hello", "world"];
|
|
t = ["Hello", "yah", "world"];
|
|
simIter = gapWeightedSimilarityIncremental(s, t, 0.5);
|
|
assert(simIter.front == 2); // two 1-gram matches
|
|
simIter.popFront;
|
|
assert(simIter.front == 0.5, text(simIter.front)); // one 2-gram match, 1 gap
|
|
}
|
|
|
|
unittest
|
|
{
|
|
GapWeightedSimilarityIncremental!(string[]) sim =
|
|
GapWeightedSimilarityIncremental!(string[])(
|
|
["nyuk", "I", "have", "no", "chocolate", "giba"],
|
|
["wyda", "I", "have", "I", "have", "have", "I", "have", "hehe"],
|
|
0.5);
|
|
double witness[] = [ 7., 4.03125, 0, 0 ];
|
|
foreach (e; sim)
|
|
{
|
|
//writeln(e);
|
|
assert(e == witness.front);
|
|
witness.popFront;
|
|
}
|
|
witness = [ 3., 1.3125, 0.25 ];
|
|
sim = GapWeightedSimilarityIncremental!(string[])(
|
|
["I", "have", "no", "chocolate"],
|
|
["I", "have", "some", "chocolate"],
|
|
0.5);
|
|
foreach (e; sim)
|
|
{
|
|
//writeln(e);
|
|
assert(e == witness.front);
|
|
witness.popFront;
|
|
}
|
|
assert(witness.empty);
|
|
}
|
|
|
|
/*
|
|
* Copyright (C) 2004-2009 by Digital Mars, www.digitalmars.com
|
|
* Written by Andrei Alexandrescu, www.erdani.org
|
|
*
|
|
* This software is provided 'as-is', without any express or implied
|
|
* warranty. In no event will the authors be held liable for any damages
|
|
* arising from the use of this software.
|
|
*
|
|
* Permission is granted to anyone to use this software for any purpose,
|
|
* including commercial applications, and to alter it and redistribute it
|
|
* freely, subject to the following restrictions:
|
|
*
|
|
* o The origin of this software must not be misrepresented; you must not
|
|
* claim that you wrote the original software. If you use this software
|
|
* in a product, an acknowledgment in the product documentation would be
|
|
* appreciated but is not required.
|
|
* o Altered source versions must be plainly marked as such, and must not
|
|
* be misrepresented as being the original software.
|
|
* o This notice may not be removed or altered from any source
|
|
* distribution.
|
|
*/
|
|
/+
|
|
/**
|
|
Primes generator
|
|
*/
|
|
struct Primes(UIntType)
|
|
{
|
|
private UIntType[] found = [ 2 ];
|
|
|
|
UIntType front() { return found[$ - 1]; }
|
|
|
|
void popFront()
|
|
{
|
|
outer:
|
|
for (UIntType candidate = front + 1 + (front != 2); ; candidate += 2)
|
|
{
|
|
UIntType stop = cast(uint) sqrt(cast(double) candidate);
|
|
foreach (e; found)
|
|
{
|
|
if (e > stop) break;
|
|
if (candidate % e == 0) continue outer;
|
|
}
|
|
// found!
|
|
found ~= candidate;
|
|
break;
|
|
}
|
|
}
|
|
|
|
enum bool empty = false;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
foreach (e; take(10, Primes!(uint)())) writeln(e);
|
|
}
|
|
+/
|