Fixes splitter!pred and splitter(string)

This commit is contained in:
monarchdodra 2013-08-22 16:10:41 +02:00
parent ec25c68f47
commit 60a54e7af5
2 changed files with 186 additions and 72 deletions

View file

@ -2717,32 +2717,45 @@ unittest
assert(words.equal([ "i", "am", "pointing" ])); assert(words.equal([ "i", "am", "pointing" ]));
} }
///ditto
auto splitter(alias isTerminator, Range)(Range input) auto splitter(alias isTerminator, Range)(Range input)
if (is(typeof(unaryFun!(isTerminator)(ElementType!(Range).init)))) if (isForwardRange!Range && is (typeof(unaryFun!isTerminator(input.front))))
{ {
return SplitterResult!(unaryFun!isTerminator, Range)(input); return SplitterResult!(unaryFun!isTerminator, Range)(input);
} }
private struct SplitterResult(alias isTerminator, Range) private struct SplitterResult(alias isTerminator, Range)
{ {
enum fullSlicing = (hasLength!Range && hasSlicing!Range) || isSomeString!Range;
private Range _input; private Range _input;
private size_t _end; private size_t _end = 0;
static if(!fullSlicing)
private Range _next;
private void findTerminator()
{
static if (fullSlicing)
{
auto r = find!isTerminator(_input.save);
_end = _input.length - r.length;
}
else
for ( _end = 0; !_next.empty ; _next.popFront)
{
if (isTerminator(_next.front)) break;
++_end;
}
}
this(Range input) this(Range input)
{ {
_input = input; _input = input;
if (_input.empty) static if(!fullSlicing)
{ _next = _input.save;
_end = _end.max;
} if (!_input.empty)
else findTerminator();
{
// Chase first terminator
while (_end < _input.length && !isTerminator(_input[_end]))
{
++_end;
}
}
} }
static if (isInfinite!Range) static if (isInfinite!Range)
@ -2757,57 +2770,54 @@ private struct SplitterResult(alias isTerminator, Range)
} }
} }
@property Range front() @property auto front()
{ {
assert(!empty); version(assert) if (empty) throw new RangeError();
static if (fullSlicing)
return _input[0 .. _end]; return _input[0 .. _end];
else
//Note: We don't need to save input, as we never actually modify it.
//Input is already saved as "checkpoints" of _next.
return _input.takeExactly(_end);
} }
void popFront() void popFront()
{ {
assert(!empty); version(assert) if (empty) throw new RangeError();
if (_input.empty)
static if (fullSlicing)
{ {
_end = _end.max;
return;
}
// Skip over existing word
_input = _input[_end .. _input.length]; _input = _input[_end .. _input.length];
// Skip terminator
for (;;)
{
if (_input.empty) if (_input.empty)
{ {
// Nothing following the terminator - done
_end = _end.max; _end = _end.max;
return; return;
} }
if (!isTerminator(_input.front))
{
// Found a legit next field
break;
}
_input.popFront(); _input.popFront();
} }
assert(!_input.empty && !isTerminator(_input.front)); else
// Prepare _end
_end = 1;
while (_end < _input.length && !isTerminator(_input[_end]))
{ {
++_end; if (_next.empty)
{
_input = _next;
_end = _end.max;
return;
} }
_next.popFront();
_input = _next.save;
}
findTerminator();
} }
static if (isForwardRange!Range)
{
@property typeof(this) save() @property typeof(this) save()
{ {
auto ret = this; auto ret = this;
ret._input = _input.save; ret._input = _input.save;
static if (!fullSlicing)
ret._next = _next.save;
return ret; return ret;
} }
} }
}
unittest unittest
{ {
@ -2826,22 +2836,18 @@ unittest
writeln("unittest @", __FILE__, ":", __LINE__, " done."); writeln("unittest @", __FILE__, ":", __LINE__, " done.");
void compare(string sentence, string[] witness) void compare(string sentence, string[] witness)
{ {
foreach (word; splitter!"a == ' '"(sentence)) auto r = splitter!"a == ' '"(sentence);
{ assert(equal(r.save, witness), format("got: %(%s, %) expected: %(%s, %)", r, witness));
assert(word == witness.front, word);
witness.popFront();
}
assert(witness.empty, witness[0]);
} }
compare(" Mary has a little lamb. ", compare(" Mary has a little lamb. ",
["", "Mary", "has", "a", "little", "lamb."]); ["", "Mary", "", "has", "a", "little", "lamb.", "", "", ""]);
compare("Mary has a little lamb. ", compare("Mary has a little lamb. ",
["Mary", "has", "a", "little", "lamb."]); ["Mary", "", "has", "a", "little", "lamb.", "", "", ""]);
compare("Mary has a little lamb.", compare("Mary has a little lamb.",
["Mary", "has", "a", "little", "lamb."]); ["Mary", "", "has", "a", "little", "lamb."]);
compare("", []);
compare("", [""]); compare("", [""]);
compare(" ", ["", ""]);
static assert(isForwardRange!(typeof(splitter!"a == ' '"("ABC")))); static assert(isForwardRange!(typeof(splitter!"a == ' '"("ABC"))));
@ -2857,10 +2863,45 @@ unittest
} }
} }
unittest
{
struct Entry
{
int low;
int high;
int[][] result;
}
Entry[] entries = [
Entry(0, 0, [[]]),
Entry(0, 1, [[0]]),
Entry(1, 2, [[], []]),
Entry(2, 7, [[2], [4], [6]]),
Entry(1, 8, [[], [2], [4], [6], []]),
];
foreach ( entry ; entries )
{
auto a = iota(entry.low, entry.high).filter!"true"();
auto b = splitter!"a%2"(a);
assert(equal!equal(b.save, entry.result), format("got: %(%s, %) expected: %(%s, %)", b, entry.result));
}
}
unittest
{
//@@@6791@@@
assert(equal(std.array.splitter("là dove terminava quella valle"), ["là", "dove", "terminava", "quella", "valle"]));
assert(equal(splitter!(std.uni.isWhite)("là dove terminava quella valle"), ["là", "dove", "terminava", "quella", "valle"]));
assert(equal(splitter!"a=='本'"("日本語"), ["日", "語"]));
}
//@@@6730@@@ This exists already in std.array, so this declaration, at best, will only create ambiguity.
//unfortunatly, an alias will conflict with the existing splitter in std.algorithm.
//It needs to be removed.
deprecated("std.algorithm.splitter(string) is deprecated in favor of std.algortihm.splitter(string)")
auto splitter(Range)(Range input) auto splitter(Range)(Range input)
if (isSomeString!Range) if (isSomeString!Range)
{ {
return splitter!(std.uni.isWhite)(input); return std.array.splitter(input);
} }
unittest unittest
@ -2872,7 +2913,7 @@ unittest
lines[1] = "line \ttwo".dup; lines[1] = "line \ttwo".dup;
lines[2] = "yah last line\ryah".dup; lines[2] = "yah last line\ryah".dup;
foreach (line; lines) { foreach (line; lines) {
foreach (word; splitter(std.string.strip(line))) { foreach (word; std.array.splitter(std.string.strip(line))) {
if (word in dictionary) continue; // Nothing to do if (word in dictionary) continue; // Nothing to do
auto newID = dictionary.length; auto newID = dictionary.length;
dictionary[to!string(word)] = cast(uint)newID; dictionary[to!string(word)] = cast(uint)newID;

View file

@ -1385,13 +1385,69 @@ unittest //safety, purity, ctfe ...
assertCTFEable!dg; assertCTFEable!dg;
} }
/** /++
Splits a string by whitespace. Lazily splits the string $(D s) into words, using whitespace as
*/ delimiter.
auto splitter(C)(C[] s) @safe pure
if(isSomeString!(C[])) This function is string specific and, contrary to $(D
splitter!(std.uni.isWhite)), runs of white spaces will be merged together.
+/
auto splitter(C)(C[] s)
if(isSomeChar!C)
{ {
return std.algorithm.splitter!(std.uni.isWhite)(s); static struct Result
{
private:
alias S = C[];
S _s;
size_t _frontLength;
size_t _backLength;
void getFirst()
{
auto r = find!(std.uni.isWhite)(_s);
_frontLength = _s.length - r.length;
}
public:
this(C[] s)
{
_s = s.strip();
getFirst();
}
@property C[] front()
{
version(assert) if (empty) throw new RangeError();
return _s[0 .. _frontLength];
}
void popFront()
{
version(assert) if (empty) throw new RangeError();
_s = _s[_frontLength .. $].stripLeft();
getFirst();
}
@property empty()
{
return _s.empty();
}
@property Result save()
{
return this;
}
}
return Result(s);
}
///
unittest
{
auto a = " a bcd ef gh ";
assert(equal(splitter(a), ["a", "bcd", "ef", "gh"][]));
} }
/// ///
@ -1406,16 +1462,21 @@ auto splitter(C)(C[] s) @safe pure
foreach(S; TypeTuple!(string, wstring, dstring)) foreach(S; TypeTuple!(string, wstring, dstring))
{ {
S a = " a bcd ef gh "; S a = " a bcd ef gh ";
assert(equal(splitter(a), [to!S(""), to!S("a"), to!S("bcd"), to!S("ef"), to!S("gh")][])); assert(equal(splitter(a), [to!S("a"), to!S("bcd"), to!S("ef"), to!S("gh")]));
a = ""; a = "";
assert(splitter(a).empty); assert(splitter(a).empty);
} }
immutable string s = " a bcd ef gh ";
assert(equal(splitter(s), ["a", "bcd", "ef", "gh"][]));
} }
/************************************** /++
* Splits $(D s) into an array, using $(D delim) as the delimiter. Eagerly Splits $(D s) into an array, using $(D delim) as the delimiter.
*/
Unqual!(S1)[] split(S1, S2)(S1 s, S2 delim) See also: $(XREF algorithm, splitter) for the lazy version of this operator.
+/
Unqual!S1[] split(S1, S2)(S1 s, S2 delim)
if (isForwardRange!(Unqual!S1) && isForwardRange!S2) if (isForwardRange!(Unqual!S1) && isForwardRange!S2)
{ {
Unqual!S1 us = s; Unqual!S1 us = s;
@ -1426,6 +1487,18 @@ if (isForwardRange!(Unqual!S1) && isForwardRange!S2)
} }
return app.data; return app.data;
} }
///ditto
Unqual!S1[] split(alias isTerminator, S1)(S1 s)
if (isForwardRange!(Unqual!S1))
{
Unqual!S1 us = s;
auto app = appender!(Unqual!(S1)[])();
foreach (word; std.algorithm.splitter!isTerminator(us))
{
app.put(word);
}
return app.data;
}
unittest unittest
{ {