Merge pull request #3005 from Poita/Issue14183

Fix Issue 14183 - Changes to groupBy
This commit is contained in:
H. S. Teoh 2015-03-19 13:04:07 -07:00 committed by Martin Nowak
parent 99956f9060
commit afd30b41b0
3 changed files with 136 additions and 72 deletions

View file

@ -7,13 +7,15 @@ $(BOOKTABLE Cheat Sheet,
$(TR $(TH Function Name) $(TH Description)) $(TR $(TH Function Name) $(TH Description))
$(T2 aggregate,
$(D [[3, 1, 5], [2, 6, 4]].aggregate!max) returns a range containing
the elements $(D 5) and $(D 6).)
$(T2 cache, $(T2 cache,
Eagerly evaluates and caches another range's $(D front).) Eagerly evaluates and caches another range's $(D front).)
$(T2 cacheBidirectional, $(T2 cacheBidirectional,
As above, but also provides $(D back) and $(D popBack).) As above, but also provides $(D back) and $(D popBack).)
$(T2 chunkyBy,
$(D chunkyBy!((a,b) => a[1] == b[1])([[1, 1], [1, 2], [2, 2], [2, 1]]))
returns a range containing 3 subranges: the first with just
$(D [1, 1]); the second with the elements $(D [1, 2]) and $(D [2, 2]);
and the third with just $(D [2, 1]).)
$(T2 each, $(T2 each,
$(D each!writeln([1, 2, 3])) eagerly prints the numbers $(D 1), $(D 2) $(D each!writeln([1, 2, 3])) eagerly prints the numbers $(D 1), $(D 2)
and $(D 3) on their own lines.) and $(D 3) on their own lines.)
@ -26,11 +28,6 @@ $(T2 filterBidirectional,
$(T2 group, $(T2 group,
$(D group([5, 2, 2, 3, 3])) returns a range containing the tuples $(D group([5, 2, 2, 3, 3])) returns a range containing the tuples
$(D tuple(5, 1)), $(D tuple(2, 2)), and $(D tuple(3, 2)).) $(D tuple(5, 1)), $(D tuple(2, 2)), and $(D tuple(3, 2)).)
$(T2 groupBy,
$(D groupBy!((a,b) => a[1] == b[1])([[1, 1], [1, 2], [2, 2], [2, 1]]))
returns a range containing 3 subranges: the first with just
$(D [1, 1]); the second with the elements $(D [1, 2]) and $(D [2, 2]);
and the third with just $(D [2, 1]).)
$(T2 joiner, $(T2 joiner,
$(D joiner(["hello", "world!"], "; ")) returns a range that iterates $(D joiner(["hello", "world!"], "; ")) returns a range that iterates
over the characters $(D "hello; world!"). No new string is created - over the characters $(D "hello; world!"). No new string is created -
@ -66,10 +63,9 @@ import std.functional; // : unaryFun, binaryFun;
import std.range.primitives; import std.range.primitives;
import std.traits; import std.traits;
///
template aggregate(fun...) if (fun.length >= 1) template aggregate(fun...) if (fun.length >= 1)
{ {
/** /* --Intentionally not ddoc--
* Aggregates elements in each subrange of the given range of ranges using * Aggregates elements in each subrange of the given range of ranges using
* the given aggregating function(s). * the given aggregating function(s).
* Params: * Params:
@ -90,7 +86,6 @@ template aggregate(fun...) if (fun.length >= 1)
return ror.map!(reduce!fun); return ror.map!(reduce!fun);
} }
///
unittest unittest
{ {
import std.algorithm.comparison : equal, max, min; import std.algorithm.comparison : equal, max, min;
@ -1374,8 +1369,8 @@ unittest
auto g6 = a6.group; auto g6 = a6.group;
} }
// Used by implementation of groupBy for non-forward input ranges. // Used by implementation of chunkBy for non-forward input ranges.
private struct GroupByChunkImpl(alias pred, Range) private struct ChunkByChunkImpl(alias pred, Range)
if (isInputRange!Range && !isForwardRange!Range) if (isInputRange!Range && !isForwardRange!Range)
{ {
alias fun = binaryFun!pred; alias fun = binaryFun!pred;
@ -1398,11 +1393,31 @@ private struct GroupByChunkImpl(alias pred, Range)
void popFront() { r.popFront(); } void popFront() { r.popFront(); }
} }
// Implementation of groupBy for non-forward input ranges. private template ChunkByImplIsUnary(alias pred, Range)
private struct GroupByImpl(alias pred, Range) {
static if (is(typeof(binaryFun!pred(ElementType!Range.init,
ElementType!Range.init)) : bool))
enum ChunkByImplIsUnary = false;
else static if (is(typeof(
unaryFun!pred(ElementType!Range.init) ==
unaryFun!pred(ElementType!Range.init))))
enum ChunkByImplIsUnary = true;
else
static assert(0, "chunkBy expects either a binary predicate or "~
"a unary predicate on range elements of type: "~
ElementType!Range.stringof);
}
// Implementation of chunkBy for non-forward input ranges.
private struct ChunkByImpl(alias pred, Range)
if (isInputRange!Range && !isForwardRange!Range) if (isInputRange!Range && !isForwardRange!Range)
{ {
alias fun = binaryFun!pred; enum bool isUnary = ChunkByImplIsUnary!(pred, Range);
static if (isUnary)
alias eq = binaryFun!((a, b) => unaryFun!pred(a) == unaryFun!pred(b));
else
alias eq = binaryFun!pred;
private Range r; private Range r;
private ElementType!Range _prev; private ElementType!Range _prev;
@ -1414,8 +1429,8 @@ private struct GroupByImpl(alias pred, Range)
{ {
// Check reflexivity if predicate is claimed to be an equivalence // Check reflexivity if predicate is claimed to be an equivalence
// relation. // relation.
assert(pred(r.front, r.front), assert(eq(r.front, r.front),
"predicate " ~ pred.stringof ~ " is not reflexive"); "predicate is not reflexive");
// _prev's type may be a nested struct, so must be initialized // _prev's type may be a nested struct, so must be initialized
// directly in the constructor (cannot call savePred()). // directly in the constructor (cannot call savePred()).
@ -1431,14 +1446,23 @@ private struct GroupByImpl(alias pred, Range)
@property auto front() @property auto front()
{ {
return GroupByChunkImpl!(pred, Range)(r, _prev); static if (isUnary)
{
import std.typecons : tuple;
return tuple(unaryFun!pred(_prev),
ChunkByChunkImpl!(eq, Range)(r, _prev));
}
else
{
return ChunkByChunkImpl!(eq, Range)(r, _prev);
}
} }
void popFront() void popFront()
{ {
while (!r.empty) while (!r.empty)
{ {
if (!fun(_prev, r.front)) if (!eq(_prev, r.front))
{ {
_prev = r.front; _prev = r.front;
break; break;
@ -1448,12 +1472,19 @@ private struct GroupByImpl(alias pred, Range)
} }
} }
// Single-pass implementation of groupBy for forward ranges. // Single-pass implementation of chunkBy for forward ranges.
private struct GroupByImpl(alias pred, Range) private struct ChunkByImpl(alias pred, Range)
if (isForwardRange!Range) if (isForwardRange!Range)
{ {
import std.typecons : RefCounted; import std.typecons : RefCounted;
enum bool isUnary = ChunkByImplIsUnary!(pred, Range);
static if (isUnary)
alias eq = binaryFun!((a, b) => unaryFun!pred(a) == unaryFun!pred(b));
else
alias eq = binaryFun!pred;
// Outer range // Outer range
static struct Impl static struct Impl
{ {
@ -1482,8 +1513,8 @@ private struct GroupByImpl(alias pred, Range)
mothership = origin; mothership = origin;
// Note: this requires reflexivity. // Note: this requires reflexivity.
assert(pred(start.front, current.front), assert(eq(start.front, current.front),
"predicate " ~ pred.stringof ~ " is not reflexive"); "predicate is not reflexive");
} }
@property bool empty() { return groupNum == size_t.max; } @property bool empty() { return groupNum == size_t.max; }
@ -1494,7 +1525,7 @@ private struct GroupByImpl(alias pred, Range)
current.popFront(); current.popFront();
// Note: this requires transitivity. // Note: this requires transitivity.
if (current.empty || !pred(start.front, current.front)) if (current.empty || !eq(start.front, current.front))
{ {
if (groupNum == mothership.groupNum) if (groupNum == mothership.groupNum)
{ {
@ -1524,14 +1555,26 @@ private struct GroupByImpl(alias pred, Range)
} }
@property bool empty() { return impl.current.empty; } @property bool empty() { return impl.current.empty; }
@property auto front() { return Group(impl); }
@property auto front()
{
static if (isUnary)
{
import std.typecons : tuple;
return tuple(unaryFun!pred(impl.current.front), Group(impl));
}
else
{
return Group(impl);
}
}
void popFront() void popFront()
{ {
// Scan for next group. If we're lucky, one of our Groups would have // Scan for next group. If we're lucky, one of our Groups would have
// already set .next to the start of the next group, in which case the // already set .next to the start of the next group, in which case the
// loop is skipped. // loop is skipped.
while (!impl.next.empty && pred(impl.current.front, impl.next.front)) while (!impl.next.empty && eq(impl.current.front, impl.next.front))
{ {
impl.next.popFront(); impl.next.popFront();
} }
@ -1574,7 +1617,7 @@ unittest
static assert(isForwardRange!RefFwdRange); static assert(isForwardRange!RefFwdRange);
auto testdata = new RefFwdRange([1, 3, 5, 2, 4, 7, 6, 8, 9]); auto testdata = new RefFwdRange([1, 3, 5, 2, 4, 7, 6, 8, 9]);
auto groups = testdata.groupBy!((a,b) => (a % 2) == (b % 2)); auto groups = testdata.chunkBy!((a,b) => (a % 2) == (b % 2));
auto outerSave1 = groups.save; auto outerSave1 = groups.save;
// Sanity test // Sanity test
@ -1617,14 +1660,17 @@ unittest
* reflexive ($(D pred(x,x)) is always true), symmetric * reflexive ($(D pred(x,x)) is always true), symmetric
* ($(D pred(x,y) == pred(y,x))), and transitive ($(D pred(x,y) && pred(y,z)) * ($(D pred(x,y) == pred(y,x))), and transitive ($(D pred(x,y) && pred(y,z))
* implies $(D pred(x,z))). If this is not the case, the range returned by * implies $(D pred(x,z))). If this is not the case, the range returned by
* groupBy may assert at runtime or behave erratically. * chunkBy may assert at runtime or behave erratically.
* *
* Params: * Params:
* pred = Predicate for determining equivalence. * pred = Predicate for determining equivalence.
* r = The range to be chunked. * r = The range to be chunked.
* *
* Returns: A range of ranges in which all elements in a given subrange are * Returns: With a binary predicate, a range of ranges is returned in which
* equivalent under the given predicate. * all elements in a given subrange are equivalent under the given predicate.
* With a unary predicate, a range of tuples is returned, with the tuple
* consisting of the result of the unary predicate for each subrange, and the
* subrange itself.
* *
* Notes: * Notes:
* *
@ -1637,20 +1683,10 @@ unittest
* $(XREF algorithm,group), which collapses adjacent equivalent elements into a * $(XREF algorithm,group), which collapses adjacent equivalent elements into a
* single element. * single element.
*/ */
auto groupBy(alias pred, Range)(Range r) auto chunkBy(alias pred, Range)(Range r)
if (isInputRange!Range) if (isInputRange!Range)
{ {
static if (is(typeof(binaryFun!pred(ElementType!Range.init, return ChunkByImpl!(pred, Range)(r);
ElementType!Range.init)) : bool))
return GroupByImpl!(pred, Range)(r);
else static if (is(typeof(
unaryFun!pred(ElementType!Range.init) ==
unaryFun!pred(ElementType!Range.init))))
return GroupByImpl!((a,b) => pred(a) == pred(b), Range)(r);
else
static assert(0, "groupBy expects either a binary predicate or "~
"a unary predicate on range elements of type: "~
ElementType!Range.stringof);
} }
/// Showing usage with binary predicate: /// Showing usage with binary predicate:
@ -1666,13 +1702,13 @@ auto groupBy(alias pred, Range)(Range r)
[2, 3] [2, 3]
]; ];
auto r1 = data.groupBy!((a,b) => a[0] == b[0]); auto r1 = data.chunkBy!((a,b) => a[0] == b[0]);
assert(r1.equal!equal([ assert(r1.equal!equal([
[[1, 1], [1, 2]], [[1, 1], [1, 2]],
[[2, 2], [2, 3]] [[2, 2], [2, 3]]
])); ]));
auto r2 = data.groupBy!((a,b) => a[1] == b[1]); auto r2 = data.chunkBy!((a,b) => a[1] == b[1]);
assert(r2.equal!equal([ assert(r2.equal!equal([
[[1, 1]], [[1, 1]],
[[1, 2], [2, 2]], [[1, 2], [2, 2]],
@ -1694,7 +1730,7 @@ unittest
{ {
// Grouping by maximum adjacent difference: // Grouping by maximum adjacent difference:
import std.math : abs; import std.math : abs;
auto r3 = [1, 3, 2, 5, 4, 9, 10].groupBy!((a, b) => abs(a-b) < 3); auto r3 = [1, 3, 2, 5, 4, 9, 10].chunkBy!((a, b) => abs(a-b) < 3);
assert(r3.equal!equal([ assert(r3.equal!equal([
[1, 3, 2], [1, 3, 2],
[5, 4], [5, 4],
@ -1707,6 +1743,7 @@ unittest
/* FIXME: pure @safe nothrow*/ unittest /* FIXME: pure @safe nothrow*/ unittest
{ {
import std.algorithm.comparison : equal; import std.algorithm.comparison : equal;
import std.typecons : tuple;
// Grouping by particular attribute of each element: // Grouping by particular attribute of each element:
auto range = auto range =
@ -1720,31 +1757,33 @@ unittest
[3, 3] [3, 3]
]; ];
auto byX = groupBy!(a => a[0])(range); auto byX = chunkBy!(a => a[0])(range);
auto expected1 = auto expected1 =
[ [
[[1, 1], [1, 1], [1, 2]], tuple(1, [[1, 1], [1, 1], [1, 2]]),
[[2, 2], [2, 3], [2, 3]], tuple(2, [[2, 2], [2, 3], [2, 3]]),
[[3, 3]] tuple(3, [[3, 3]])
]; ];
foreach (e; byX) foreach (e; byX)
{ {
assert(!expected1.empty); assert(!expected1.empty);
assert(e.equal(expected1.front)); assert(e[0] == expected1.front[0]);
assert(e[1].equal(expected1.front[1]));
expected1.popFront(); expected1.popFront();
} }
auto byY = groupBy!(a => a[1])(range); auto byY = chunkBy!(a => a[1])(range);
auto expected2 = auto expected2 =
[ [
[[1, 1], [1, 1]], tuple(1, [[1, 1], [1, 1]]),
[[1, 2], [2, 2]], tuple(2, [[1, 2], [2, 2]]),
[[2, 3], [2, 3], [3, 3]] tuple(3, [[2, 3], [2, 3], [3, 3]])
]; ];
foreach (e; byY) foreach (e; byY)
{ {
assert(!expected2.empty); assert(!expected2.empty);
assert(e.equal(expected2.front)); assert(e[0] == expected2.front[0]);
assert(e[1].equal(expected2.front[1]));
expected2.popFront(); expected2.popFront();
} }
} }
@ -1752,6 +1791,7 @@ unittest
/*FIXME: pure @safe nothrow*/ unittest /*FIXME: pure @safe nothrow*/ unittest
{ {
import std.algorithm.comparison : equal; import std.algorithm.comparison : equal;
import std.typecons : tuple;
struct Item { int x, y; } struct Item { int x, y; }
@ -1772,11 +1812,11 @@ unittest
auto arr = [ Item(1,2), Item(1,3), Item(2,3) ]; auto arr = [ Item(1,2), Item(1,3), Item(2,3) ];
static assert(isForwardRange!(typeof(arr))); static assert(isForwardRange!(typeof(arr)));
auto byX = groupBy!(a => a.x)(arr); auto byX = chunkBy!(a => a.x)(arr);
static assert(isForwardRange!(typeof(byX))); static assert(isForwardRange!(typeof(byX)));
auto byX_subrange1 = byX.front.save; auto byX_subrange1 = byX.front[1].save;
auto byX_subrange2 = byX.front.save; auto byX_subrange2 = byX.front[1].save;
static assert(isForwardRange!(typeof(byX_subrange1))); static assert(isForwardRange!(typeof(byX_subrange1)));
static assert(isForwardRange!(typeof(byX_subrange2))); static assert(isForwardRange!(typeof(byX_subrange2)));
@ -1786,32 +1826,38 @@ unittest
assert(byX_subrange1.equal([ Item(1,3) ])); assert(byX_subrange1.equal([ Item(1,3) ]));
assert(byX_subrange2.equal([ Item(1,2), Item(1,3) ])); assert(byX_subrange2.equal([ Item(1,2), Item(1,3) ]));
auto byY = groupBy!(a => a.y)(arr); auto byY = chunkBy!(a => a.y)(arr);
static assert(isForwardRange!(typeof(byY))); static assert(isForwardRange!(typeof(byY)));
auto byY2 = byY.save; auto byY2 = byY.save;
static assert(is(typeof(byY) == typeof(byY2))); static assert(is(typeof(byY) == typeof(byY2)));
byY.popFront(); byY.popFront();
assert(byY.front.equal([ Item(1,3), Item(2,3) ])); assert(byY.front[0] == 3);
assert(byY2.front.equal([ Item(1,2) ])); assert(byY.front[1].equal([ Item(1,3), Item(2,3) ]));
assert(byY2.front[0] == 2);
assert(byY2.front[1].equal([ Item(1,2) ]));
} }
// Test non-forward input ranges. // Test non-forward input ranges.
{ {
auto range = refInputRange([ Item(1,1), Item(1,2), Item(2,2) ]); auto range = refInputRange([ Item(1,1), Item(1,2), Item(2,2) ]);
auto byX = groupBy!(a => a.x)(range); auto byX = chunkBy!(a => a.x)(range);
assert(byX.front.equal([ Item(1,1), Item(1,2) ])); assert(byX.front[0] == 1);
assert(byX.front[1].equal([ Item(1,1), Item(1,2) ]));
byX.popFront(); byX.popFront();
assert(byX.front.equal([ Item(2,2) ])); assert(byX.front[0] == 2);
assert(byX.front[1].equal([ Item(2,2) ]));
byX.popFront(); byX.popFront();
assert(byX.empty); assert(byX.empty);
assert(range.empty); assert(range.empty);
range = refInputRange([ Item(1,1), Item(1,2), Item(2,2) ]); range = refInputRange([ Item(1,1), Item(1,2), Item(2,2) ]);
auto byY = groupBy!(a => a.y)(range); auto byY = chunkBy!(a => a.y)(range);
assert(byY.front.equal([ Item(1,1) ])); assert(byY.front[0] == 1);
assert(byY.front[1].equal([ Item(1,1) ]));
byY.popFront(); byY.popFront();
assert(byY.front.equal([ Item(1,2), Item(2,2) ])); assert(byY.front[0] == 2);
assert(byY.front[1].equal([ Item(1,2), Item(2,2) ]));
byY.popFront(); byY.popFront();
assert(byY.empty); assert(byY.empty);
assert(range.empty); assert(range.empty);
@ -1823,7 +1869,7 @@ version(none) // This requires support for non-equivalence relations
unittest unittest
{ {
import std.algorithm.comparison : equal; import std.algorithm.comparison : equal;
auto r = [1, 2, 3, 4, 5, 6, 7, 8, 9].groupBy!((x, y) => ((x*y) % 3) == 0); auto r = [1, 2, 3, 4, 5, 6, 7, 8, 9].chunkBy!((x, y) => ((x*y) % 3) == 0);
assert(r.equal!equal([ assert(r.equal!equal([
[1], [1],
[2, 3, 4], [2, 3, 4],
@ -1835,7 +1881,7 @@ unittest
// Issue 13805 // Issue 13805
unittest unittest
{ {
[""].map!((s) => s).groupBy!((x, y) => true); [""].map!((s) => s).chunkBy!((x, y) => true);
} }
// joiner // joiner

View file

@ -59,14 +59,13 @@ $(TR $(TDNW Comparison)
$(TR $(TDNW Iteration) $(TR $(TDNW Iteration)
$(TDNW $(SUBMODULE iteration)) $(TDNW $(SUBMODULE iteration))
$(TD $(TD
$(SUBREF iteration, aggregate)
$(SUBREF iteration, cache) $(SUBREF iteration, cache)
$(SUBREF iteration, cacheBidirectional) $(SUBREF iteration, cacheBidirectional)
$(SUBREF iteration, chunkBy)
$(SUBREF iteration, each) $(SUBREF iteration, each)
$(SUBREF iteration, filter) $(SUBREF iteration, filter)
$(SUBREF iteration, filterBidirectional) $(SUBREF iteration, filterBidirectional)
$(SUBREF iteration, group) $(SUBREF iteration, group)
$(SUBREF iteration, groupBy)
$(SUBREF iteration, joiner) $(SUBREF iteration, joiner)
$(SUBREF iteration, map) $(SUBREF iteration, map)
$(SUBREF iteration, reduce) $(SUBREF iteration, reduce)

View file

@ -7142,6 +7142,17 @@ sgi.com/tech/stl/binary_search.html, binary_search).
} }
return false; return false;
} }
// groupBy
/**
Returns a range of subranges of elements that are equivalent according to the
sorting relation.
*/
auto groupBy()()
{
import std.algorithm.iteration : chunkBy;
return _input.chunkBy!((a, b) => !predFun(a, b) && !predFun(b, a));
}
} }
/// ///
@ -7271,6 +7282,14 @@ that break its sortedness, $(D SortedRange) will work erratically.
auto s = assumeSorted(arr); auto s = assumeSorted(arr);
} }
unittest
{
import std.algorithm.comparison : equal;
int[] arr = [100, 101, 102, 200, 201, 300];
auto s = assumeSorted!((a, b) => a / 100 < b / 100)(arr);
assert(s.groupBy.equal!equal([[100, 101, 102], [200, 201], [300]]));
}
// Test on an input range // Test on an input range
unittest unittest
{ {