Add documentation and unittests regarding multisets

This commit is contained in:
RazvanN7 2017-07-21 09:19:02 +03:00
parent a2a96e9e0f
commit dc648c5b38
2 changed files with 124 additions and 7 deletions

View file

@ -3,6 +3,14 @@
This is a submodule of $(MREF std, algorithm).
It contains generic algorithms that implement set operations.
The functions $(LREF multiwayMerge), $(LREF multiwayUnion), $(LREF setDifference),
$(LREF setIntersection), $(LREF setSymmetricDifference) expect a range of sorted
ranges as input.
All algorithms are generalized to accept as input not only sets but also
$(HTTP https://en.wikipedia.org/wiki/Multiset, multisets). Each algorithm
documents behaviour in the presence of duplicated inputs.
$(SCRIPT inhibitQuickIndex = 1;)
$(BOOKTABLE Cheat Sheet,
$(TR $(TH Function Name) $(TH Description))
@ -14,8 +22,9 @@ $(T2 largestPartialIntersectionWeighted,
Copies out the values that occur most frequently (multiplied by
per-value weights) in a range of ranges.)
$(T2 multiwayMerge,
Computes the union of a set of sets implemented as a range of sorted
ranges.)
Merges a range of sorted ranges.)
$(T2 multiwayUnion,
Computes the union of a range of sorted ranges.)
$(T2 setDifference,
Lazily computes the set difference of two or more sorted ranges.)
$(T2 setIntersection,
@ -568,6 +577,11 @@ array of the occurrences and then selecting its top items, and also
requires less memory ($(D largestPartialIntersection) builds its
result directly in $(D tgt) and requires no extra memory).
If at least one of the ranges is a multiset, then all occurences
of a duplicate element are taken into account. The result is
equivalent to merging all ranges and picking the most frequent
$(D tgt.length) elements.
Warning: Because $(D largestPartialIntersection) does not allocate
extra memory, it will leave $(D ror) modified. Namely, $(D
largestPartialIntersection) assumes ownership of $(D ror) and
@ -616,6 +630,22 @@ void largestPartialIntersection
largestPartialIntersection(a, c);
assert(c[0] == tuple(1.0, 3u));
// 1.0 occurs in 3 inputs
// multiset
double[][] x =
[
[1, 1, 1, 1, 4, 7, 8],
[1, 7],
[1, 7, 8],
[4, 7],
[7]
];
auto y = new Tuple!(double, uint)[2];
largestPartialIntersection(x.dup, y);
// 7.0 occurs 5 times
assert(y[0] == tuple(7.0, 5u));
// 1.0 occurs 6 times
assert(y[1] == tuple(1.0, 6u));
}
import std.algorithm.sorting : SortOutput; // FIXME
@ -625,6 +655,11 @@ import std.algorithm.sorting : SortOutput; // FIXME
Similar to $(D largestPartialIntersection), but associates a weight
with each distinct element in the intersection.
If at least one of the ranges is a multiset, then all occurences
of a duplicate element are taken into account. The result
is equivalent to merging all input ranges and picking the highest
$(D tgt.length), weight-based ranking elements.
Params:
less = The predicate the ranges are sorted by.
ror = A range of $(REF_ALTTEXT forward ranges, isForwardRange, std,range,primitives)
@ -672,6 +707,20 @@ void largestPartialIntersectionWeighted
assert(b[0] == tuple(4.0, 2u));
// 4.0 occurs 2 times -> 4.6 (2 * 2.3)
// 7.0 occurs 3 times -> 4.4 (3 * 1.1)
// multiset
double[][] x =
[
[ 1, 1, 1, 4, 7, 8 ],
[ 1, 7 ],
[ 1, 7, 8],
[ 4 ],
[ 7 ],
];
auto y = new Tuple!(double, uint)[1];
largestPartialIntersectionWeighted(x, y, weights);
assert(y[0] == tuple(1.0, 5u));
// 1.0 occurs 5 times -> 1.2 * 5 = 6
}
@system unittest
@ -746,7 +795,7 @@ void largestPartialIntersectionWeighted
// MultiwayMerge
/**
Computes the union of multiple sets. The input sets are passed as a
Merges multiple sets. The input sets are passed as a
range of ranges and each is assumed to be sorted by $(D
less). Computation is done lazily, one union element at a time. The
complexity of one $(D popFront) operation is $(BIGOH
@ -759,6 +808,10 @@ MultiwayMerge) is $(BIGOH n * ror.length * log(ror.length)), i.e., $(D
log(ror.length)) times worse than just spanning all ranges in
turn. The output comes sorted (unstably) by $(D less).
The length of the resulting range is the sum of all lengths of
the ranges passed as input. This means that all elements (duplicates
included) are transferred to the resulting range.
For backward compatibility, `multiwayMerge` is available under
the name `nWayUnion` and `MultiwayMerge` under the name of `NWayUnion` .
Future code should use `multiwayMerge` and `MultiwayMerge` as `nWayUnion`
@ -859,6 +912,18 @@ MultiwayMerge!(less, RangeOfRanges) multiwayMerge
1, 1, 1, 4, 4, 7, 7, 7, 7, 8, 8
];
assert(equal(multiwayMerge(a), witness));
double[][] b =
[
// range with duplicates
[ 1, 1, 4, 7, 8 ],
[ 7 ],
[ 1, 7, 8],
[ 4 ],
[ 7 ],
];
// duplicates are propagated to the resulting range
assert(equal(multiwayMerge(b), witness));
}
alias nWayUnion = multiwayMerge;
@ -870,14 +935,16 @@ as a range of ranges and each is assumed to be sorted by $(D
less). Computation is done lazily, one union element at a time.
`multiwayUnion(ror)` is functionally equivalent to `multiwayMerge(ror).uniq`.
"The output of multiwayUnion has no duplicates even when its inputs contain duplicates."
Params:
less = Predicate the given ranges are sorted by.
ror = A range of ranges sorted by `less` to compute the intersection for.
Returns:
A range of the intersection of the ranges in `ror`.
A range of the union of the ranges in `ror`.
See also: $(LREF NWayUnion)
See also: $(LREF multiwayMerge)
*/
auto multiwayUnion(alias less = "a < b", RangeOfRanges)(RangeOfRanges ror)
{
@ -890,6 +957,7 @@ auto multiwayUnion(alias less = "a < b", RangeOfRanges)(RangeOfRanges ror)
{
import std.algorithm.comparison : equal;
// sets
double[][] a =
[
[ 1, 4, 7, 8 ],
@ -901,6 +969,17 @@ auto multiwayUnion(alias less = "a < b", RangeOfRanges)(RangeOfRanges ror)
auto witness = [1, 4, 7, 8];
assert(equal(multiwayUnion(a), witness));
// multisets
double[][] b =
[
[ 1, 1, 1, 4, 7, 8 ],
[ 1, 7 ],
[ 1, 7, 7, 8],
[ 4 ],
[ 7 ],
];
assert(equal(multiwayUnion(b), witness));
}
/**
@ -908,6 +987,11 @@ Lazily computes the difference of $(D r1) and $(D r2). The two ranges
are assumed to be sorted by $(D less). The element types of the two
ranges must have a common type.
In the case of multisets, considering that element `a` appears `x`
times in $(D r1) and `y` times and $(D r2), the number of occurences
of `a` in the resulting range is going to be `x-y` if x > y or 0 othwerise.
Params:
less = Predicate the given ranges are sorted by.
r1 = The first range.
@ -997,10 +1081,18 @@ SetDifference!(less, R1, R2) setDifference(alias less = "a < b", R1, R2)
import std.algorithm.comparison : equal;
import std.range.primitives : isForwardRange;
//sets
int[] a = [ 1, 2, 4, 5, 7, 9 ];
int[] b = [ 0, 1, 2, 4, 7, 8 ];
assert(equal(setDifference(a, b), [5, 9][]));
assert(equal(setDifference(a, b), [5, 9]));
static assert(isForwardRange!(typeof(setDifference(a, b))));
// multisets
int[] x = [1, 1, 1, 2, 3];
int[] y = [1, 1, 2, 4, 5];
auto r = setDifference(x, y);
assert(equal(r, [1, 3]));
assert(setDifference(r, x).empty);
}
@safe unittest // Issue 10460
@ -1019,6 +1111,10 @@ Lazily computes the intersection of two or more input ranges $(D
ranges). The ranges are assumed to be sorted by $(D less). The element
types of the ranges must have a common type.
In the case of multisets, the range with the minimum number of
occurences of a given element, propagates the number of
occurences of this element to the resulting range.
Params:
less = Predicate the given ranges are sorted by.
ranges = The ranges to compute the intersection for.
@ -1132,12 +1228,19 @@ if (Rs.length >= 2 && allSatisfy!(isInputRange, Rs) &&
{
import std.algorithm.comparison : equal;
// sets
int[] a = [ 1, 2, 4, 5, 7, 9 ];
int[] b = [ 0, 1, 2, 4, 7, 8 ];
int[] c = [ 0, 1, 4, 5, 7, 8 ];
assert(equal(setIntersection(a, a), a));
assert(equal(setIntersection(a, b), [1, 2, 4, 7]));
assert(equal(setIntersection(a, b, c), [1, 4, 7]));
// multisets
int[] d = [ 1, 1, 2, 2, 7, 7 ];
int[] e = [ 1, 1, 1, 7];
assert(equal(setIntersection(a, d), [1, 2, 7]));
assert(equal(setIntersection(d, e), [1, 1, 7]));
}
@safe unittest
@ -1177,6 +1280,12 @@ r2). The two ranges are assumed to be sorted by $(D less), and the
output is also sorted by $(D less). The element types of the two
ranges must have a common type.
If both ranges are sets (without duplicated elements), the resulting
range is going to be a set. If at least one of the ranges is a multiset,
the number of occurences of an element `x` in the resulting range is `abs(a-b)`
where `a` is the number of occurences of `x` in $(D r1), `b` is the number of
occurences of `x` in $(D r2), and `abs` is the absolute value.
If both arguments are ranges of L-values of the same type then
$(D SetSymmetricDifference) will also be a range of L-values of
that type.
@ -1288,10 +1397,17 @@ setSymmetricDifference(alias less = "a < b", R1, R2)
import std.algorithm.comparison : equal;
import std.range.primitives : isForwardRange;
// sets
int[] a = [ 1, 2, 4, 5, 7, 9 ];
int[] b = [ 0, 1, 2, 4, 7, 8 ];
assert(equal(setSymmetricDifference(a, b), [0, 5, 8, 9][]));
static assert(isForwardRange!(typeof(setSymmetricDifference(a, b))));
//mutisets
int[] c = [1, 1, 1, 1, 2, 2, 2, 4, 5, 6];
int[] d = [1, 1, 2, 2, 2, 2, 4, 7, 9];
assert(equal(setSymmetricDifference(c, d), setSymmetricDifference(d, c)));
assert(equal(setSymmetricDifference(c, d), [1, 1, 2, 5, 6, 7, 9]));
}
@safe unittest // Issue 10460