Merge pull request #1502 from monarchdodra/splitterPred

Fix splitter!pred and splitter(string)
2025-05-03 08:30:33 +03:00 · 2013-11-17 18:10:03 -08:00 · 2013-11-17 18:10:03 -08:00 · d408470f8f
commit d408470f8f
parent b2c6179854 4583a87031
2 changed files with 269 additions and 86 deletions
--- a/std/algorithm.d
+++ b/std/algorithm.d
@ -2310,6 +2310,8 @@ if (is(typeof(ElementType!Range.init == Separator.init))
            {
                _separatorLength = codeLength!(ElementEncodingType!Range)(separator);
            }
+            if (_input.empty)
+                _frontLength = _atEnd;
        }

        static if (isInfinite!Range)
@ -2430,7 +2432,7 @@ unittest
    // }
    assert(equal(splitter(a, 0), w));
    a = null;
-    assert(equal(splitter(a, 0), [ (int[]).init ][]));
+    assert(equal(splitter(a, 0),  (int[][]).init));
    a = [ 0 ];
    assert(equal(splitter(a, 0), [ (int[]).init, (int[]).init ][]));
    a = [ 0, 1 ];
@ -2717,32 +2719,48 @@ unittest
    assert(words.equal([ "i", "am", "pointing" ]));
 }

+///ditto
 auto splitter(alias isTerminator, Range)(Range input)
-if (is(typeof(unaryFun!(isTerminator)(ElementType!(Range).init))))
+if (isForwardRange!Range && is(typeof(unaryFun!isTerminator(input.front))))
 {
    return SplitterResult!(unaryFun!isTerminator, Range)(input);
 }

 private struct SplitterResult(alias isTerminator, Range)
 {
+    enum fullSlicing = (hasLength!Range && hasSlicing!Range) || isSomeString!Range;
+
    private Range _input;
-    private size_t _end;
+    private size_t _end = 0;
+    static if(!fullSlicing)
+        private Range _next;
+
+    private void findTerminator()
+    {
+        static if (fullSlicing)
+        {
+            auto r = find!isTerminator(_input.save);
+            _end = _input.length - r.length;
+        }
+        else
+            for ( _end = 0; !_next.empty ; _next.popFront)
+            {
+                if (isTerminator(_next.front))
+                    break;
+                ++_end;
+            }
+    }

    this(Range input)
    {
        _input = input;
-        if (_input.empty)
-        {
-            _end = _end.max;
-        }
+        static if(!fullSlicing)
+            _next = _input.save;
+
+        if (!_input.empty)
+            findTerminator();
        else
-        {
-            // Chase first terminator
-            while (_end < _input.length && !isTerminator(_input[_end]))
-            {
-                ++_end;
-            }
-        }
+            _end = size_t.max;
    }

    static if (isInfinite!Range)
@ -2753,61 +2771,56 @@ private struct SplitterResult(alias isTerminator, Range)
    {
        @property bool empty()
        {
-            return _end == _end.max;
+            return _end == size_t.max;
        }
    }

-    @property Range front()
+    @property auto front()
    {
-        assert(!empty);
+        version(assert) if (empty) throw new RangeError();
+        static if (fullSlicing)
            return _input[0 .. _end];
+        else
+            return _input.save.takeExactly(_end);
    }

    void popFront()
    {
-        assert(!empty);
-        if (_input.empty)
+        version(assert) if (empty) throw new RangeError();
+
+        static if (fullSlicing)
        {
-            _end = _end.max;
-            return;
-        }
-        // Skip over existing word
            _input = _input[_end .. _input.length];
-        // Skip terminator
-        for (;;)
-        {
            if (_input.empty)
            {
-                // Nothing following the terminator - done
-                _end = _end.max;
+                _end = size_t.max;
                return;
            }
-            if (!isTerminator(_input.front))
-            {
-                // Found a legit next field
-                break;
-            }
            _input.popFront();
        }
-        assert(!_input.empty && !isTerminator(_input.front));
-        // Prepare _end
-        _end = 1;
-        while (_end < _input.length && !isTerminator(_input[_end]))
+        else
        {
-            ++_end;
+            if (_next.empty)
+            {
+                _input = _next;
+                _end = size_t.max;
+                return;
            }
+            _next.popFront();
+            _input = _next.save;
+        }
+        findTerminator();
    }

-    static if (isForwardRange!Range)
-    {
    @property typeof(this) save()
    {
        auto ret = this;
        ret._input = _input.save;
+        static if (!fullSlicing)
+            ret._next = _next.save;
        return ret;
    }
 }
-}

 unittest
 {
@ -2826,22 +2839,18 @@ unittest
        writeln("unittest @", __FILE__, ":", __LINE__, " done.");
    void compare(string sentence, string[] witness)
    {
-        foreach (word; splitter!"a == ' '"(sentence))
-        {
-            assert(word == witness.front, word);
-            witness.popFront();
-        }
-        assert(witness.empty, witness[0]);
+        auto r = splitter!"a == ' '"(sentence);
+        assert(equal(r.save, witness), format("got: %(%s, %) expected: %(%s, %)", r, witness));
    }

    compare(" Mary  has a little lamb.   ",
-            ["", "Mary", "has", "a", "little", "lamb."]);
+            ["", "Mary", "", "has", "a", "little", "lamb.", "", "", ""]);
    compare("Mary  has a little lamb.   ",
-            ["Mary", "has", "a", "little", "lamb."]);
+            ["Mary", "", "has", "a", "little", "lamb.", "", "", ""]);
    compare("Mary  has a little lamb.",
-            ["Mary", "has", "a", "little", "lamb."]);
-    compare("", []);
-    compare(" ", [""]);
+            ["Mary", "", "has", "a", "little", "lamb."]);
+    compare("", (string[]).init);
+    compare(" ", ["", ""]);

    static assert(isForwardRange!(typeof(splitter!"a == ' '"("ABC"))));

@ -2857,10 +2866,45 @@ unittest
    }
 }

+unittest
+{
+    struct Entry
+    {
+        int low;
+        int high;
+        int[][] result;
+    }
+    Entry[] entries = [
+        Entry(0, 0, []),
+        Entry(0, 1, [[0]]),
+        Entry(1, 2, [[], []]),
+        Entry(2, 7, [[2], [4], [6]]),
+        Entry(1, 8, [[], [2], [4], [6], []]),
+    ];
+    foreach ( entry ; entries )
+    {
+        auto a = iota(entry.low, entry.high).filter!"true"();
+        auto b = splitter!"a%2"(a);
+        assert(equal!equal(b.save, entry.result), format("got: %(%s, %) expected: %(%s, %)", b, entry.result));
+    }
+}
+
+unittest
+{
+    //@@@6791@@@
+    assert(equal(std.array.splitter("là dove terminava quella valle"), ["là", "dove", "terminava", "quella", "valle"]));
+    assert(equal(splitter!(std.uni.isWhite)("là dove terminava quella valle"), ["là", "dove", "terminava", "quella", "valle"]));
+    assert(equal(splitter!"a=='本'"("日本語"), ["日", "語"]));
+}
+
+//@@@6730@@@ This exists already in std.array, so this declaration, at best, will only create ambiguity.
+//unfortunatly, an alias will conflict with the existing splitter in std.algorithm.
+//It needs to be removed.
+deprecated("Please use std.array.splitter for string specific splitting")
 auto splitter(Range)(Range input)
 if (isSomeString!Range)
 {
-    return splitter!(std.uni.isWhite)(input);
+    return std.array.splitter(input);
 }

 unittest
@ -2872,7 +2916,7 @@ unittest
    lines[1] = "line \ttwo".dup;
    lines[2] = "yah            last   line\ryah".dup;
    foreach (line; lines) {
-       foreach (word; splitter(std.string.strip(line))) {
+       foreach (word; std.array.splitter(std.string.strip(line))) {
            if (word in dictionary) continue; // Nothing to do
            auto newID = dictionary.length;
            dictionary[to!string(word)] = cast(uint)newID;
@ -2886,6 +2930,72 @@ unittest
    assert(dictionary["last"]== 4);
 }

+unittest
+{
+    // Check consistency:
+    // All flavors of split should produce the same results
+    foreach (input; [(int[]).init,
+                     [0],
+                     [0, 1, 0],
+                     [1, 1, 0, 0, 1, 1],
+                    ])
+    {
+        foreach (s; [0, 1])
+        {
+            auto result = split(input, s);
+
+            assert(equal(result, split(input, [s])), format(`"[%(%s,%)]"`, split(input, [s])));
+            //assert(equal(result, split(input, [s].filter!"true"())));                          //Not yet implemented
+            assert(equal(result, split!((a) => a == s)(input)), text(split!((a) => a == s)(input)));
+
+            //assert(equal!equal(result, split(input.filter!"true"(), s)));                      //Not yet implemented
+            //assert(equal!equal(result, split(input.filter!"true"(), [s])));                    //Not yet implemented
+            //assert(equal!equal(result, split(input.filter!"true"(), [s].filter!"true"())));    //Not yet implemented
+            assert(equal!equal(result, split!((a) => a == s)(input.filter!"true"())));
+
+            assert(equal(result, splitter(input, s)));
+            assert(equal(result, splitter(input, [s])));
+            //assert(equal(result, splitter(input, [s].filter!"true"())));                       //Not yet implemented
+            assert(equal(result, splitter!((a) => a == s)(input)));
+
+            //assert(equal!equal(result, splitter(input.filter!"true"(), s)));                   //Not yet implemented
+            //assert(equal!equal(result, splitter(input.filter!"true"(), [s])));                 //Not yet implemented
+            //assert(equal!equal(result, splitter(input.filter!"true"(), [s].filter!"true"()))); //Not yet implemented
+            assert(equal!equal(result, splitter!((a) => a == s)(input.filter!"true"())));
+        }
+    }
+    foreach (input; [string.init,
+                     " ",
+                     "  hello ",
+                     "hello   hello",
+                     " hello   what heck   this ?  "
+                    ])
+    {
+        foreach (s; [' ', 'h'])
+        {
+            auto result = split(input, s);
+
+            assert(equal(result, split(input, [s])));
+            //assert(equal(result, split(input, [s].filter!"true"())));                          //Not yet implemented
+            assert(equal(result, split!((a) => a == s)(input)));
+
+            //assert(equal!equal(result, split(input.filter!"true"(), s)));                      //Not yet implemented
+            //assert(equal!equal(result, split(input.filter!"true"(), [s])));                    //Not yet implemented
+            //assert(equal!equal(result, split(input.filter!"true"(), [s].filter!"true"())));    //Not yet implemented
+            assert(equal!equal(result, split!((a) => a == s)(input.filter!"true"())));
+
+            assert(equal(result, splitter(input, s)));
+            assert(equal(result, splitter(input, [s])));
+            //assert(equal(result, splitter(input, [s].filter!"true"())));                       //Not yet implemented
+            assert(equal(result, splitter!((a) => a == s)(input)));
+
+            //assert(equal!equal(result, splitter(input.filter!"true"(), s)));                   //Not yet implemented
+            //assert(equal!equal(result, splitter(input.filter!"true"(), [s])));                 //Not yet implemented
+            //assert(equal!equal(result, splitter(input.filter!"true"(), [s].filter!"true"()))); //Not yet implemented
+            assert(equal!equal(result, splitter!((a) => a == s)(input.filter!"true"())));
+        }
+    }
+}
 // joiner
 /**
 Lazily joins a range of ranges with a separator. The separator itself
--- a/std/array.d
+++ b/std/array.d
@ -13,8 +13,9 @@ Source: $(PHOBOSSRC std/_array.d)
 module std.array;

 import core.memory, core.bitop;
-import std.algorithm, std.ascii, std.conv, std.exception, std.range, std.string,
-       std.traits, std.typecons, std.typetuple, std.uni, std.utf;
+import std.algorithm, std.ascii, std.conv, std.exception, std.functional,
+       std.range, std.string, std.traits, std.typecons, std.typetuple,
+       std.uni, std.utf;
 import std.c.string : memcpy;
 version(unittest) import core.exception, std.stdio;

@ -1385,45 +1386,117 @@ unittest //safety, purity, ctfe ...
    assertCTFEable!dg;
 }

-/**
-Splits a string by whitespace.
- */
-auto splitter(C)(C[] s) @safe pure
-    if(isSomeString!(C[]))
+/++
+Lazily splits the string $(D s) into words, using whitespace as
+the delimiter.
+
+This function is string specific and, contrary to $(D
+splitter!(std.uni.isWhite)), runs of whitespace will be merged together
+(no empty tokens will be produced).
+ +/
+auto splitter(C)(C[] s)
+if(isSomeChar!C)
 {
-    return std.algorithm.splitter!(std.uni.isWhite)(s);
+    static struct Result
+    {
+    private:
+        C[] _s;
+        size_t _frontLength;
+
+        void getFirst() pure @safe
+        {
+            auto r = find!(std.uni.isWhite)(_s);
+            _frontLength = _s.length - r.length;
+        }
+
+    public:
+        this(C[] s) pure @safe
+        {
+            _s = s.strip();
+            getFirst();
+        }
+
+        @property C[] front() pure @safe
+        {
+            version(assert) if (empty) throw new RangeError();
+            return _s[0 .. _frontLength];
+        }
+
+        void popFront() pure @safe
+        {
+            version(assert) if (empty) throw new RangeError();
+            _s = _s[_frontLength .. $].stripLeft();
+            getFirst();
+        }
+
+        @property bool empty() const pure nothrow @safe
+        {
+            return _s.empty;
+        }
+
+        @property inout(Result) save() inout pure nothrow @safe
+        {
+            return this;
+        }
+    }
+    return Result(s);
 }

 ///
@safe pure unittest
 {
    auto a = " a     bcd   ef gh ";
-    assert(equal(splitter(a), ["", "a", "bcd", "ef", "gh"][]));
+    assert(equal(splitter(a), ["a", "bcd", "ef", "gh"][]));
 }

-/*@safe*/ pure unittest
+@safe pure unittest
 {
    foreach(S; TypeTuple!(string, wstring, dstring))
    {
        S a = " a     bcd   ef gh ";
-        assert(equal(splitter(a), [to!S(""), to!S("a"), to!S("bcd"), to!S("ef"), to!S("gh")][]));
+        assert(equal(splitter(a), [to!S("a"), to!S("bcd"), to!S("ef"), to!S("gh")]));
        a = "";
        assert(splitter(a).empty);
    }
+
+    immutable string s = " a     bcd   ef gh ";
+    assert(equal(splitter(s), ["a", "bcd", "ef", "gh"][]));
 }

-/**************************************
- * Splits $(D s) into an array, using $(D delim) as the delimiter.
- */
-Unqual!(S1)[] split(S1, S2)(S1 s, S2 delim)
-if (isForwardRange!(Unqual!S1) && isForwardRange!S2)
+/++
+Eagerly splits $(D s) into an array, using $(D delim) as the delimiter.
+
+See also: $(XREF algorithm, splitter) for the lazy version of this operator.
+ +/
+auto split(R, E)(R r, E delim)
+if (isForwardRange!R && is(typeof(ElementType!R.init == E.init)))
 {
-    Unqual!S1 us = s;
-    auto app = appender!(Unqual!(S1)[])();
-    foreach (word; std.algorithm.splitter(us, delim))
-    {
-        app.put(word);
+    auto spl = std.algorithm.splitter(r, delim);
+    alias S = typeof(spl.front.init); // "Slice_t"
+    auto app = appender!(S[])();
+    foreach (e; spl)
+        app.put(e);
+    return app.data;
 }
+auto split(R1, R2)(R1 r, R2 delim)
+if (isForwardRange!R1 && isForwardRange!R2 && is(typeof(ElementType!R1.init == ElementType!R2.init)))
+{
+    auto spl = std.algorithm.splitter(r, delim);
+    alias S = typeof(spl.front.init); // "Slice_t"
+    auto app = appender!(S[])();
+    foreach (e; spl)
+        app.put(e);
+    return app.data;
+}
+///ditto
+auto split(alias isTerminator, R)(R r)
+if (isForwardRange!R && is(typeof(unaryFun!isTerminator(r.front))))
+{
+    auto spl = std.algorithm.splitter!isTerminator(r);
+    alias S = typeof(spl.front.init); // "Slice_t"
+    auto app = appender!(S[])();
+    foreach (e; spl)
+        app.put(e);
    return app.data;
 }