Fixes splitter!pred and splitter(string)

2025-05-07 03:27:03 +03:00 · 2013-08-22 16:10:41 +02:00 · 2013-08-22 16:10:41 +02:00 · 60a54e7af5
commit 60a54e7af5
parent ec25c68f47
2 changed files with 186 additions and 72 deletions
--- a/std/algorithm.d
+++ b/std/algorithm.d
@ -2717,32 +2717,45 @@ unittest
    assert(words.equal([ "i", "am", "pointing" ]));
 }
 ///ditto
 auto splitter(alias isTerminator, Range)(Range input)
-if (is(typeof(unaryFun!(isTerminator)(ElementType!(Range).init))))
+if (isForwardRange!Range && is (typeof(unaryFun!isTerminator(input.front))))
 {
    return SplitterResult!(unaryFun!isTerminator, Range)(input);
 }
 private struct SplitterResult(alias isTerminator, Range)
 {
    enum fullSlicing = (hasLength!Range && hasSlicing!Range) || isSomeString!Range;
    private Range _input;
-    private size_t _end;
+    private size_t _end = 0;
    static if(!fullSlicing)
        private Range _next;
    private void findTerminator()
    {
        static if (fullSlicing)
        {
            auto r = find!isTerminator(_input.save);
            _end = _input.length - r.length;
        }
        else
            for ( _end = 0; !_next.empty ; _next.popFront)
            {
                if (isTerminator(_next.front)) break;
                ++_end;
            }
    }
    this(Range input)
    {
        _input = input;
-        if (_input.empty)
+        static if(!fullSlicing)
-        {
+            _next = _input.save;
-            _end = _end.max;
+
-        }
+        if (!_input.empty)
-        else
+            findTerminator();
        {
            // Chase first terminator
            while (_end < _input.length && !isTerminator(_input[_end]))
            {
                ++_end;
            }
        }
    }
    static if (isInfinite!Range)
@ -2757,57 +2770,54 @@ private struct SplitterResult(alias isTerminator, Range)
        }
    }
-    @property Range front()
+    @property auto front()
    {
-        assert(!empty);
+        version(assert) if (empty) throw new RangeError();
        static if (fullSlicing)
            return _input[0 .. _end];
        else
            //Note: We don't need to save input, as we never actually modify it.
            //Input is already saved as "checkpoints" of _next.
            return _input.takeExactly(_end);
    }
    void popFront()
    {
-        assert(!empty);
+        version(assert) if (empty) throw new RangeError();
-        if (_input.empty)
+
        static if (fullSlicing)
        {
            _end = _end.max;
            return;
        }
        // Skip over existing word
            _input = _input[_end .. _input.length];
        // Skip terminator
        for (;;)
        {
            if (_input.empty)
            {
                // Nothing following the terminator - done
                _end = _end.max;
                return;
            }
            if (!isTerminator(_input.front))
            {
                // Found a legit next field
                break;
            }
            _input.popFront();
        }
-        assert(!_input.empty && !isTerminator(_input.front));
+        else
        // Prepare _end
        _end = 1;
        while (_end < _input.length && !isTerminator(_input[_end]))
        {
-            ++_end;
+            if (_next.empty)
            {
                _input = _next;
                _end = _end.max;
                return;
            }
            _next.popFront();
            _input = _next.save;
        }
        findTerminator();
    }
    static if (isForwardRange!Range)
    {
    @property typeof(this) save()
    {
        auto ret = this;
        ret._input = _input.save;
        static if (!fullSlicing)
            ret._next = _next.save;
        return ret;
    }
 }
 }
 unittest
 {
@ -2826,22 +2836,18 @@ unittest
        writeln("unittest @", __FILE__, ":", __LINE__, " done.");
    void compare(string sentence, string[] witness)
    {
-        foreach (word; splitter!"a == ' '"(sentence))
+        auto r = splitter!"a == ' '"(sentence);
-        {
+        assert(equal(r.save, witness), format("got: %(%s, %) expected: %(%s, %)", r, witness));
            assert(word == witness.front, word);
            witness.popFront();
        }
        assert(witness.empty, witness[0]);
    }
    compare(" Mary  has a little lamb.   ",
-            ["", "Mary", "has", "a", "little", "lamb."]);
+            ["", "Mary", "", "has", "a", "little", "lamb.", "", "", ""]);
    compare("Mary  has a little lamb.   ",
-            ["Mary", "has", "a", "little", "lamb."]);
+            ["Mary", "", "has", "a", "little", "lamb.", "", "", ""]);
    compare("Mary  has a little lamb.",
-            ["Mary", "has", "a", "little", "lamb."]);
+            ["Mary", "", "has", "a", "little", "lamb."]);
    compare("", []);
    compare("", [""]);
    compare(" ", ["", ""]);
    static assert(isForwardRange!(typeof(splitter!"a == ' '"("ABC"))));
@ -2857,10 +2863,45 @@ unittest
    }
 }
 unittest
 {
    struct Entry
    {
        int low;
        int high;
        int[][] result;
    }
    Entry[] entries = [
        Entry(0, 0, [[]]),
        Entry(0, 1, [[0]]),
        Entry(1, 2, [[], []]),
        Entry(2, 7, [[2], [4], [6]]),
        Entry(1, 8, [[], [2], [4], [6], []]),
    ];
    foreach ( entry ; entries )
    {
        auto a = iota(entry.low, entry.high).filter!"true"();
        auto b = splitter!"a%2"(a);
        assert(equal!equal(b.save, entry.result), format("got: %(%s, %) expected: %(%s, %)", b, entry.result));
    }
 }
 unittest
 {
    //@@@6791@@@
    assert(equal(std.array.splitter("là dove terminava quella valle"), ["là", "dove", "terminava", "quella", "valle"]));
    assert(equal(splitter!(std.uni.isWhite)("là dove terminava quella valle"), ["là", "dove", "terminava", "quella", "valle"]));
    assert(equal(splitter!"a=='本'"("日本語"), ["日", "語"]));
 }
 //@@@6730@@@ This exists already in std.array, so this declaration, at best, will only create ambiguity.
 //unfortunatly, an alias will conflict with the existing splitter in std.algorithm.
 //It needs to be removed.
 deprecated("std.algorithm.splitter(string) is deprecated in favor of std.algortihm.splitter(string)")
 auto splitter(Range)(Range input)
 if (isSomeString!Range)
 {
-    return splitter!(std.uni.isWhite)(input);
+    return std.array.splitter(input);
 }
 unittest
@ -2872,7 +2913,7 @@ unittest
    lines[1] = "line \ttwo".dup;
    lines[2] = "yah            last   line\ryah".dup;
    foreach (line; lines) {
-       foreach (word; splitter(std.string.strip(line))) {
+       foreach (word; std.array.splitter(std.string.strip(line))) {
            if (word in dictionary) continue; // Nothing to do
            auto newID = dictionary.length;
            dictionary[to!string(word)] = cast(uint)newID;
--- a/std/array.d
+++ b/std/array.d
@ -1385,13 +1385,69 @@ unittest //safety, purity, ctfe ...
    assertCTFEable!dg;
 }
-/**
+/++
-Splits a string by whitespace.
+Lazily splits the string $(D s) into words, using whitespace as
- */
+delimiter.
-auto splitter(C)(C[] s) @safe pure
+
-    if(isSomeString!(C[]))
+This function is string specific and, contrary to $(D
 splitter!(std.uni.isWhite)), runs of white spaces will be merged together.
 +/
 auto splitter(C)(C[] s)
    if(isSomeChar!C)
 {
-    return std.algorithm.splitter!(std.uni.isWhite)(s);
+    static struct Result
    {
    private:
        alias S = C[];
        S _s;
        size_t _frontLength;
        size_t _backLength;
        void getFirst()
        {
            auto r = find!(std.uni.isWhite)(_s);
            _frontLength = _s.length - r.length;
        }
    public:
        this(C[] s)
        {
            _s = s.strip();
            getFirst();
        }
        @property C[] front()
        {
            version(assert) if (empty) throw new RangeError();
            return _s[0 .. _frontLength];
        }
        void popFront()
        {
            version(assert) if (empty) throw new RangeError();
            _s = _s[_frontLength .. $].stripLeft();
            getFirst();
        }
        @property empty()
        {
            return _s.empty();
        }
        @property Result save()
        {
            return this;
        }
    }
    return Result(s);
 }
 ///
 unittest
 {
    auto a = " a     bcd   ef gh ";
    assert(equal(splitter(a), ["a", "bcd", "ef", "gh"][]));
 }
 ///
@ -1406,16 +1462,21 @@ auto splitter(C)(C[] s) @safe pure
    foreach(S; TypeTuple!(string, wstring, dstring))
    {
        S a = " a     bcd   ef gh ";
-        assert(equal(splitter(a), [to!S(""), to!S("a"), to!S("bcd"), to!S("ef"), to!S("gh")][]));
+        assert(equal(splitter(a), [to!S("a"), to!S("bcd"), to!S("ef"), to!S("gh")]));
        a = "";
        assert(splitter(a).empty);
    }
    immutable string s = " a     bcd   ef gh ";
    assert(equal(splitter(s), ["a", "bcd", "ef", "gh"][]));
 }
-/**************************************
+/++
- * Splits $(D s) into an array, using $(D delim) as the delimiter.
+Eagerly Splits $(D s) into an array, using $(D delim) as the delimiter.
- */
+
-Unqual!(S1)[] split(S1, S2)(S1 s, S2 delim)
+See also: $(XREF algorithm, splitter) for the lazy version of this operator.
 +/
 Unqual!S1[] split(S1, S2)(S1 s, S2 delim)
 if (isForwardRange!(Unqual!S1) && isForwardRange!S2)
 {
    Unqual!S1 us = s;
@ -1426,6 +1487,18 @@ if (isForwardRange!(Unqual!S1) && isForwardRange!S2)
    }
    return app.data;
 }
 ///ditto
 Unqual!S1[] split(alias isTerminator, S1)(S1 s)
 if (isForwardRange!(Unqual!S1))
 {
    Unqual!S1 us = s;
    auto app = appender!(Unqual!(S1)[])();
    foreach (word; std.algorithm.splitter!isTerminator(us))
    {
        app.put(word);
    }
    return app.data;
 }
 unittest
 {