mirror of
https://github.com/dlang/phobos.git
synced 2025-04-29 06:30:28 +03:00
Add documentation and unittests regarding multisets
This commit is contained in:
parent
a2a96e9e0f
commit
dc648c5b38
2 changed files with 124 additions and 7 deletions
|
@ -3,6 +3,14 @@
|
|||
This is a submodule of $(MREF std, algorithm).
|
||||
It contains generic algorithms that implement set operations.
|
||||
|
||||
The functions $(LREF multiwayMerge), $(LREF multiwayUnion), $(LREF setDifference),
|
||||
$(LREF setIntersection), $(LREF setSymmetricDifference) expect a range of sorted
|
||||
ranges as input.
|
||||
|
||||
All algorithms are generalized to accept as input not only sets but also
|
||||
$(HTTP https://en.wikipedia.org/wiki/Multiset, multisets). Each algorithm
|
||||
documents behaviour in the presence of duplicated inputs.
|
||||
|
||||
$(SCRIPT inhibitQuickIndex = 1;)
|
||||
$(BOOKTABLE Cheat Sheet,
|
||||
$(TR $(TH Function Name) $(TH Description))
|
||||
|
@ -14,8 +22,9 @@ $(T2 largestPartialIntersectionWeighted,
|
|||
Copies out the values that occur most frequently (multiplied by
|
||||
per-value weights) in a range of ranges.)
|
||||
$(T2 multiwayMerge,
|
||||
Computes the union of a set of sets implemented as a range of sorted
|
||||
ranges.)
|
||||
Merges a range of sorted ranges.)
|
||||
$(T2 multiwayUnion,
|
||||
Computes the union of a range of sorted ranges.)
|
||||
$(T2 setDifference,
|
||||
Lazily computes the set difference of two or more sorted ranges.)
|
||||
$(T2 setIntersection,
|
||||
|
@ -568,6 +577,11 @@ array of the occurrences and then selecting its top items, and also
|
|||
requires less memory ($(D largestPartialIntersection) builds its
|
||||
result directly in $(D tgt) and requires no extra memory).
|
||||
|
||||
If at least one of the ranges is a multiset, then all occurences
|
||||
of a duplicate element are taken into account. The result is
|
||||
equivalent to merging all ranges and picking the most frequent
|
||||
$(D tgt.length) elements.
|
||||
|
||||
Warning: Because $(D largestPartialIntersection) does not allocate
|
||||
extra memory, it will leave $(D ror) modified. Namely, $(D
|
||||
largestPartialIntersection) assumes ownership of $(D ror) and
|
||||
|
@ -616,6 +630,22 @@ void largestPartialIntersection
|
|||
largestPartialIntersection(a, c);
|
||||
assert(c[0] == tuple(1.0, 3u));
|
||||
// 1.0 occurs in 3 inputs
|
||||
|
||||
// multiset
|
||||
double[][] x =
|
||||
[
|
||||
[1, 1, 1, 1, 4, 7, 8],
|
||||
[1, 7],
|
||||
[1, 7, 8],
|
||||
[4, 7],
|
||||
[7]
|
||||
];
|
||||
auto y = new Tuple!(double, uint)[2];
|
||||
largestPartialIntersection(x.dup, y);
|
||||
// 7.0 occurs 5 times
|
||||
assert(y[0] == tuple(7.0, 5u));
|
||||
// 1.0 occurs 6 times
|
||||
assert(y[1] == tuple(1.0, 6u));
|
||||
}
|
||||
|
||||
import std.algorithm.sorting : SortOutput; // FIXME
|
||||
|
@ -625,6 +655,11 @@ import std.algorithm.sorting : SortOutput; // FIXME
|
|||
Similar to $(D largestPartialIntersection), but associates a weight
|
||||
with each distinct element in the intersection.
|
||||
|
||||
If at least one of the ranges is a multiset, then all occurences
|
||||
of a duplicate element are taken into account. The result
|
||||
is equivalent to merging all input ranges and picking the highest
|
||||
$(D tgt.length), weight-based ranking elements.
|
||||
|
||||
Params:
|
||||
less = The predicate the ranges are sorted by.
|
||||
ror = A range of $(REF_ALTTEXT forward ranges, isForwardRange, std,range,primitives)
|
||||
|
@ -672,6 +707,20 @@ void largestPartialIntersectionWeighted
|
|||
assert(b[0] == tuple(4.0, 2u));
|
||||
// 4.0 occurs 2 times -> 4.6 (2 * 2.3)
|
||||
// 7.0 occurs 3 times -> 4.4 (3 * 1.1)
|
||||
|
||||
// multiset
|
||||
double[][] x =
|
||||
[
|
||||
[ 1, 1, 1, 4, 7, 8 ],
|
||||
[ 1, 7 ],
|
||||
[ 1, 7, 8],
|
||||
[ 4 ],
|
||||
[ 7 ],
|
||||
];
|
||||
auto y = new Tuple!(double, uint)[1];
|
||||
largestPartialIntersectionWeighted(x, y, weights);
|
||||
assert(y[0] == tuple(1.0, 5u));
|
||||
// 1.0 occurs 5 times -> 1.2 * 5 = 6
|
||||
}
|
||||
|
||||
@system unittest
|
||||
|
@ -746,7 +795,7 @@ void largestPartialIntersectionWeighted
|
|||
|
||||
// MultiwayMerge
|
||||
/**
|
||||
Computes the union of multiple sets. The input sets are passed as a
|
||||
Merges multiple sets. The input sets are passed as a
|
||||
range of ranges and each is assumed to be sorted by $(D
|
||||
less). Computation is done lazily, one union element at a time. The
|
||||
complexity of one $(D popFront) operation is $(BIGOH
|
||||
|
@ -759,6 +808,10 @@ MultiwayMerge) is $(BIGOH n * ror.length * log(ror.length)), i.e., $(D
|
|||
log(ror.length)) times worse than just spanning all ranges in
|
||||
turn. The output comes sorted (unstably) by $(D less).
|
||||
|
||||
The length of the resulting range is the sum of all lengths of
|
||||
the ranges passed as input. This means that all elements (duplicates
|
||||
included) are transferred to the resulting range.
|
||||
|
||||
For backward compatibility, `multiwayMerge` is available under
|
||||
the name `nWayUnion` and `MultiwayMerge` under the name of `NWayUnion` .
|
||||
Future code should use `multiwayMerge` and `MultiwayMerge` as `nWayUnion`
|
||||
|
@ -859,6 +912,18 @@ MultiwayMerge!(less, RangeOfRanges) multiwayMerge
|
|||
1, 1, 1, 4, 4, 7, 7, 7, 7, 8, 8
|
||||
];
|
||||
assert(equal(multiwayMerge(a), witness));
|
||||
|
||||
double[][] b =
|
||||
[
|
||||
// range with duplicates
|
||||
[ 1, 1, 4, 7, 8 ],
|
||||
[ 7 ],
|
||||
[ 1, 7, 8],
|
||||
[ 4 ],
|
||||
[ 7 ],
|
||||
];
|
||||
// duplicates are propagated to the resulting range
|
||||
assert(equal(multiwayMerge(b), witness));
|
||||
}
|
||||
|
||||
alias nWayUnion = multiwayMerge;
|
||||
|
@ -870,14 +935,16 @@ as a range of ranges and each is assumed to be sorted by $(D
|
|||
less). Computation is done lazily, one union element at a time.
|
||||
`multiwayUnion(ror)` is functionally equivalent to `multiwayMerge(ror).uniq`.
|
||||
|
||||
"The output of multiwayUnion has no duplicates even when its inputs contain duplicates."
|
||||
|
||||
Params:
|
||||
less = Predicate the given ranges are sorted by.
|
||||
ror = A range of ranges sorted by `less` to compute the intersection for.
|
||||
|
||||
Returns:
|
||||
A range of the intersection of the ranges in `ror`.
|
||||
A range of the union of the ranges in `ror`.
|
||||
|
||||
See also: $(LREF NWayUnion)
|
||||
See also: $(LREF multiwayMerge)
|
||||
*/
|
||||
auto multiwayUnion(alias less = "a < b", RangeOfRanges)(RangeOfRanges ror)
|
||||
{
|
||||
|
@ -890,6 +957,7 @@ auto multiwayUnion(alias less = "a < b", RangeOfRanges)(RangeOfRanges ror)
|
|||
{
|
||||
import std.algorithm.comparison : equal;
|
||||
|
||||
// sets
|
||||
double[][] a =
|
||||
[
|
||||
[ 1, 4, 7, 8 ],
|
||||
|
@ -901,6 +969,17 @@ auto multiwayUnion(alias less = "a < b", RangeOfRanges)(RangeOfRanges ror)
|
|||
|
||||
auto witness = [1, 4, 7, 8];
|
||||
assert(equal(multiwayUnion(a), witness));
|
||||
|
||||
// multisets
|
||||
double[][] b =
|
||||
[
|
||||
[ 1, 1, 1, 4, 7, 8 ],
|
||||
[ 1, 7 ],
|
||||
[ 1, 7, 7, 8],
|
||||
[ 4 ],
|
||||
[ 7 ],
|
||||
];
|
||||
assert(equal(multiwayUnion(b), witness));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -908,6 +987,11 @@ Lazily computes the difference of $(D r1) and $(D r2). The two ranges
|
|||
are assumed to be sorted by $(D less). The element types of the two
|
||||
ranges must have a common type.
|
||||
|
||||
|
||||
In the case of multisets, considering that element `a` appears `x`
|
||||
times in $(D r1) and `y` times and $(D r2), the number of occurences
|
||||
of `a` in the resulting range is going to be `x-y` if x > y or 0 othwerise.
|
||||
|
||||
Params:
|
||||
less = Predicate the given ranges are sorted by.
|
||||
r1 = The first range.
|
||||
|
@ -997,10 +1081,18 @@ SetDifference!(less, R1, R2) setDifference(alias less = "a < b", R1, R2)
|
|||
import std.algorithm.comparison : equal;
|
||||
import std.range.primitives : isForwardRange;
|
||||
|
||||
//sets
|
||||
int[] a = [ 1, 2, 4, 5, 7, 9 ];
|
||||
int[] b = [ 0, 1, 2, 4, 7, 8 ];
|
||||
assert(equal(setDifference(a, b), [5, 9][]));
|
||||
assert(equal(setDifference(a, b), [5, 9]));
|
||||
static assert(isForwardRange!(typeof(setDifference(a, b))));
|
||||
|
||||
// multisets
|
||||
int[] x = [1, 1, 1, 2, 3];
|
||||
int[] y = [1, 1, 2, 4, 5];
|
||||
auto r = setDifference(x, y);
|
||||
assert(equal(r, [1, 3]));
|
||||
assert(setDifference(r, x).empty);
|
||||
}
|
||||
|
||||
@safe unittest // Issue 10460
|
||||
|
@ -1019,6 +1111,10 @@ Lazily computes the intersection of two or more input ranges $(D
|
|||
ranges). The ranges are assumed to be sorted by $(D less). The element
|
||||
types of the ranges must have a common type.
|
||||
|
||||
In the case of multisets, the range with the minimum number of
|
||||
occurences of a given element, propagates the number of
|
||||
occurences of this element to the resulting range.
|
||||
|
||||
Params:
|
||||
less = Predicate the given ranges are sorted by.
|
||||
ranges = The ranges to compute the intersection for.
|
||||
|
@ -1132,12 +1228,19 @@ if (Rs.length >= 2 && allSatisfy!(isInputRange, Rs) &&
|
|||
{
|
||||
import std.algorithm.comparison : equal;
|
||||
|
||||
// sets
|
||||
int[] a = [ 1, 2, 4, 5, 7, 9 ];
|
||||
int[] b = [ 0, 1, 2, 4, 7, 8 ];
|
||||
int[] c = [ 0, 1, 4, 5, 7, 8 ];
|
||||
assert(equal(setIntersection(a, a), a));
|
||||
assert(equal(setIntersection(a, b), [1, 2, 4, 7]));
|
||||
assert(equal(setIntersection(a, b, c), [1, 4, 7]));
|
||||
|
||||
// multisets
|
||||
int[] d = [ 1, 1, 2, 2, 7, 7 ];
|
||||
int[] e = [ 1, 1, 1, 7];
|
||||
assert(equal(setIntersection(a, d), [1, 2, 7]));
|
||||
assert(equal(setIntersection(d, e), [1, 1, 7]));
|
||||
}
|
||||
|
||||
@safe unittest
|
||||
|
@ -1177,6 +1280,12 @@ r2). The two ranges are assumed to be sorted by $(D less), and the
|
|||
output is also sorted by $(D less). The element types of the two
|
||||
ranges must have a common type.
|
||||
|
||||
If both ranges are sets (without duplicated elements), the resulting
|
||||
range is going to be a set. If at least one of the ranges is a multiset,
|
||||
the number of occurences of an element `x` in the resulting range is `abs(a-b)`
|
||||
where `a` is the number of occurences of `x` in $(D r1), `b` is the number of
|
||||
occurences of `x` in $(D r2), and `abs` is the absolute value.
|
||||
|
||||
If both arguments are ranges of L-values of the same type then
|
||||
$(D SetSymmetricDifference) will also be a range of L-values of
|
||||
that type.
|
||||
|
@ -1288,10 +1397,17 @@ setSymmetricDifference(alias less = "a < b", R1, R2)
|
|||
import std.algorithm.comparison : equal;
|
||||
import std.range.primitives : isForwardRange;
|
||||
|
||||
// sets
|
||||
int[] a = [ 1, 2, 4, 5, 7, 9 ];
|
||||
int[] b = [ 0, 1, 2, 4, 7, 8 ];
|
||||
assert(equal(setSymmetricDifference(a, b), [0, 5, 8, 9][]));
|
||||
static assert(isForwardRange!(typeof(setSymmetricDifference(a, b))));
|
||||
|
||||
//mutisets
|
||||
int[] c = [1, 1, 1, 1, 2, 2, 2, 4, 5, 6];
|
||||
int[] d = [1, 1, 2, 2, 2, 2, 4, 7, 9];
|
||||
assert(equal(setSymmetricDifference(c, d), setSymmetricDifference(d, c)));
|
||||
assert(equal(setSymmetricDifference(c, d), [1, 1, 2, 5, 6, 7, 9]));
|
||||
}
|
||||
|
||||
@safe unittest // Issue 10460
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue