merged

2013-03-04 02:05:18 +00:00 · 2013-03-04 02:05:18 +00:00 · c1fcef1873
parent 3c8b5c4bc4 bd54c84e3d
commit c1fcef1873
2 changed files with 3368 additions and 3381 deletions
--- a/std/d/lexer.d
+++ b/std/d/lexer.d
@ -102,7 +102,7 @@
 *
 * Copyright: Brian Schott 2013
 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
-* Authors: Brian Schott
+ * Authors: Brian Schott, Dmitry Olshansky
 * Source: $(PHOBOSSRC std/d/_lexer.d)
 */
@ -203,7 +203,7 @@ enum IterationStyle
    includeSpecialTokens = 0b0100,
    /// Do not stop iteration on reaching the ___EOF__ token
    ignoreEOF = 0b1000,
-	/// Include everything
+    /// Include _everything
    everything = includeComments | includeWhitespace | ignoreEOF
 }
@ -266,7 +266,7 @@ struct LexerConfig
    TokenStyle tokenStyle = tokenStyle.default_;
    /**
-	* Replacement for the ___VERSION__ token. Defaults to 1.
+     * Replacement for the ___VERSION__ token. Defaults to 100.
    */
    uint versionNumber = 100;
@ -289,12 +289,6 @@ struct LexerConfig
    * and error messsage.
    */
    void delegate(string, size_t, uint, uint, string) errorFunc;
 	/**
 	* Initial size of the lexer's internal token buffer in bytes. The lexer
 	* will grow this buffer if necessary.
 	*/
 	size_t bufferSize = 1024 * 4;
 }
 /**
@ -331,287 +325,6 @@ auto byToken(R)(R range, LexerConfig config)
    return r;
 }
 // For now a private helper that is tailored to the way lexer works
 // hides away forwardness of range by buffering
 // RA-version is strightforward thin wrapping
 // ATM it is byte-oriented
 private struct LexSource(R)
 	if(isForwardRange!R && !isRandomAccessRange!R)
 	{
 	bool empty() const { return _empty; }
 	auto ref front() const
 	{
 		return accum[accumIdx];
 	}
 	auto ref peek() const
 	in
 	{
 		assert (accumIdx + 1 < accum.length);
 	}
 	body
 	{
 		return accum[accumIdx + 1];
 	}
 	void popFront()
 	{
 		++_index;
 		range.popFront();
 		// if that was last byte
 		// just advance so that open-righted slice just works
 		accumIdx =  (accumIdx+1) & mask;
 		if(range.empty)
 		{
 			_empty = true;
 			return;
 		}
 		if(accumIdx == savedAccumIdx)
 		{
 			// and move stuff around
 			auto oldLen = accum.length;
 			auto toCopy = oldLen - accumIdx;
 			accum.length *= 2; // keep pow of 2
 			// copy starting with last item
 			copy(retro(accum[accumIdx..oldLen]),
 				retro(accum[$-toCopy..$]));
 			savedAccumIdx = accum.length - toCopy;
 		}
 		accum[accumIdx] = range.front;
 	}
 	auto save()
 	{
 		typeof(this) copy = this;
 		copy.range = range.save;
 		// sadly need to dup circular buffer, as it overwrites items
 		copy.accum = copy.accum.dup;
 		return copy;
 	}
 	// mark a position to slice from later on
 	size_t mark()
 	{
 		savedAccumIdx = accumIdx;
 		return accumIdx;
 	}
 	// slice to current position from previously marked position
 	auto slice() @property
 	{
 		// it's an open right range as usual
 		return CircularRange(accum, savedAccumIdx, accumIdx);
 	}
 	size_t index() const @property
 	{
 		return _index;
 	}
 private:
 	this(R src, size_t bufferSize)
 	{
 		range = src;
 		assert(bufferSize > 0);
 		assert((bufferSize & (bufferSize-1)) == 0); //is power of 2
 		accum = new ubyte[bufferSize];
 		if(range.empty)
 			_empty = true;
 		else
 			accum[accumIdx] = range.front; // load front
 	}
 	// a true RA-range of ubyte
 	struct CircularRange
 	{
 		this(ubyte[] buf, size_t s, size_t e)
 		{
 			assert((buffer.length & (buffer.length-1)) == 0);
 			buffer = buf;
 			start = s;
 			end = e;
 		}
 		//Forward range primitives
 		@property bool empty() const { return start == end; }
 		@property auto ref front() const { return buffer[start]; }
 		void popFront() { start = (start + 1) & mask; }
 		@property auto save() { return this; }
 		//Backwards is a bit slower, but should be rarely used (if at all)
 		@property ref back(){ return buffer[(end-1) & mask]; }
 		void popBack() { end  = (end - 1) & mask; }
 		// RA range primitives
 		ref opIndex(size_t idx){ return buffer[(start+idx) & mask]; }
 		@property size_t length()
 		{
 			return end < start ? end + buffer.length -start : end - start;
 		}
 		alias length opDollar;
 		auto opSlice(size_t newStart, size_t newEnd)
 		{
 			size_t maskedStart = (start+newStart) & mask;
 			size_t maskedEnd = (start+newEnd) & mask;
 			return typeof(this)(buffer, maskedStart, maskedEnd);
 		}
 		// @@@bug fwd-ref in ldc0.10 (if placed above previous one)
 		auto opSlice(){ return opSlice(0, length); }
 	private:
 		@property auto mask(){ return buffer.length-1; }
 		size_t start, end;
 		ubyte[] buffer;
 	}
 	@property auto mask(){ return accum.length-1; }
 	R range;
 	bool _empty;
 	ubyte[] accum; // accumulator buffer for non-RA ranges
 	size_t savedAccumIdx;
 	size_t accumIdx; // current index in accumulator
 	size_t _index; // index of current element in original range
 }
 // TODO: make sure it's RandomAccess later
 /*static assert(isRandomAccessRange!(
 	LexSource!(typeof(filter!"true"(cast(ubyte[])null)))
 	.CircularRange)
 );*/
 //trivial pass-through for RA ranges
 private struct LexSource(R)
 	if(isRandomAccessRange!R)
 {
 	bool empty() const @property { return cur >= range.length; }
 	bool canPeek() const { return cur + 1 < range.length; }
 	auto ref front() const @property { return range[cur]; }
 	void popFront(){ cur++; }
 	auto ref peek() const
 	in
 	{
 		assert (canPeek());
 	}
 	body
 	{
 		return range[cur + 1];
 	}
 	auto save()
 	{
 		typeof(this) copy = this;
 		copy.range = range.save;
 		return copy;
 	}
 	auto mark()
 	{
 		saved = cur;
 	}
 	// use the underliying range slicing capability
 	auto slice() @property
 	{
 		return range[saved..cur];
 	}
 	size_t index() const @property
 	{
 		return cur;
 	}
 private:
 	this(R src)
 	{
 		range = src;
 	}
 	size_t cur, saved;
 	R range;
 }
 auto lexerSource(Range)(Range range, size_t bufSize=8)
 	if(isForwardRange!Range && !isRandomAccessRange!Range
 	&& is(ElementType!Range : const(ubyte)))
 {
 	return LexSource!(Range)(range, bufSize);
 }
 auto lexerSource(Range)(Range range)
 	if(isRandomAccessRange!Range
 	&& is(ElementType!Range : const(ubyte)))
 {
 	return LexSource!(Range)(range);
 }
 unittest
 {
 	// test the basic functionality of a "mark-slice" range
 	import std.string, std.stdio;
 	static void test_hello(T)(T lexs)
 	{
 		assert(lexs.front == 'H');
 		lexs.popFront();
 		assert(lexs.front == 'e');
 		foreach(i; 0..2)
 		{
 			auto saved = lexs.save;
 			lexs.mark();
 			assert(lexs.slice.equal(""));
 			lexs.popFront();
 			assert(lexs.slice.equal("e"), text(cast(char)lexs.front));
 			lexs.popFrontN(4);
 			auto bytes = lexs.slice.map!"cast(char)a".array();
 			assert(bytes.equal("ello,"), bytes.to!string);
 			lexs.mark();
 			assert(lexs.slice.equal(""));
 			assert(lexs.front == 'w');
 			lexs.popFrontN(6);
 			assert(lexs.empty);
 			auto s = lexs.slice();
 			auto msg = s.save.map!"cast(char)a".array;
 			assert(s[].equal("world!"), msg);
 			assert(s[2..$-1].equal("rld"), msg);
 			assert(s[0] == 'w' && s[$-1] == '!');
 			s.popFront();
 			assert(s.front == 'o' && s.back == '!');
 			s.popBack();
 			assert(s.front == 'o' && s.back == 'd');
 			//restore and repeat again
 			lexs = saved;
 		}
 	}
 	static void test_empty(T)(T lexs)
 	{
 		assert(lexs.empty);
 		lexs.mark();
 		assert(lexs.slice().equal(""));
 	}
 	auto fwdLex = lexerSource(
 		"Hello, world!"
 		.representation
 		.filter!"a != ' '", 16 // and the one that is more then enough
 	);
 	test_hello(fwdLex);
 	fwdLex = lexerSource(
 		"Hello, world!"
 		.representation
 		.filter!"a != ' '", 1 // try the smallest initial buffer
 	);
 	test_hello(fwdLex);
 	fwdLex = lexerSource("".representation.filter!"a != ' '");
 	auto raLex = lexerSource("".representation);
 	test_empty(raLex);
 	test_empty(fwdLex);
 	raLex = lexerSource("Hello,world!".representation);
 	test_hello(raLex);
 }
 /**
 * Range of tokens. Use byToken$(LPAREN)$(RPAREN) to instantiate.
 */
@ -718,10 +431,10 @@ L_advance:
            "=",               "TokenType.assign",
            "@",               "TokenType.at",
            "&",               "TokenType.bitAnd",
-			"&=",              "TokenType.bitAndEqual",
+            "&=",              "TokenType.bitAndEquals",
            "|",               "TokenType.bitOr",
-			"|=",              "TokenType.bitOrEqual",
+            "|=",              "TokenType.bitOrEquals",
-			"~=",              "TokenType.catEqual",
+            "~=",              "TokenType.catEquals",
            ":",               "TokenType.colon",
            ",",               "TokenType.comma",
            "--",              "TokenType.decrement",
@ -741,21 +454,21 @@ L_advance:
            "||",              "TokenType.logicOr",
            "(",               "TokenType.lParen",
            "-",               "TokenType.minus",
-			"-=",              "TokenType.minusEqual",
+            "-=",              "TokenType.minusEquals",
            "%",               "TokenType.mod",
-			"%=",              "TokenType.modEqual",
+            "%=",              "TokenType.modEquals",
-			"*=",              "TokenType.mulEqual",
+            "*=",              "TokenType.mulEquals",
            "!",               "TokenType.not",
-			"!=",              "TokenType.notEqual",
+            "!=",              "TokenType.notEquals",
            "!>",              "TokenType.notGreater",
            "!>=",             "TokenType.notGreaterEqual",
            "!<",              "TokenType.notLess",
            "!<=",             "TokenType.notLessEqual",
            "!<>",             "TokenType.notLessEqualGreater",
            "+",               "TokenType.plus",
-			"+=",              "TokenType.plusEqual",
+            "+=",              "TokenType.plusEquals",
            "^^",              "TokenType.pow",
-			"^^=",             "TokenType.powEqual",
+            "^^=",             "TokenType.powEquals",
            "}",               "TokenType.rBrace",
            "]",               "TokenType.rBracket",
            ")",               "TokenType.rParen",
@ -771,7 +484,7 @@ L_advance:
            ">>>",             "TokenType.unsignedShiftRight",
            ">>>=",            "TokenType.unsignedShiftRightEqual",
            "^",               "TokenType.xor",
-			"^=",              "TokenType.xorEqual",
+            "^=",              "TokenType.xorEquals",
        ));
        case '/':
            nextCharNonLF();
@ -792,7 +505,7 @@ L_advance:
                goto L_advance; // tail-recursion
            case '=':
-				current.type = TokenType.divEqual;
+                current.type = TokenType.divEquals;
                current.value = "/=";
                src.popFront();
                return;
@ -2168,7 +1881,7 @@ L_advance:
 */
 pure nothrow bool isOperator(const TokenType t)
 {
-	return t >= TokenType.assign && t <= TokenType.xorEqual;
+    return t >= TokenType.assign && t <= TokenType.xorEquals;
 }
 /**
@ -2332,15 +2045,15 @@ enum TokenType: ushort
    assign, /// =
    at, /// @
    bitAnd, /// &
-	bitAndEqual, /// &=
+    bitAndEquals, /// &=
    bitOr, /// |
-	bitOrEqual, /// |=
+    bitOrEquals, /// |=
-	catEqual, /// ~=
+    catEquals, /// ~=
    colon, /// :
    comma, /// ,
    decrement, /// --
    div, /// /
-	divEqual, /// /=
+    divEquals, /// /=
    dollar, /// $
    dot, /// .
    equals, /// ==
@ -2359,21 +2072,21 @@ enum TokenType: ushort
    logicOr, /// ||
    lParen, /// $(LPAREN)
    minus, /// -
-	minusEqual, /// -=
+    minusEquals, /// -=
    mod, /// %
-	modEqual, /// %=
+    modEquals, /// %=
-	mulEqual, /// *=
+    mulEquals, /// *=
    not, /// !
-	notEqual, /// !=
+    notEquals, /// !=
    notGreater, /// !>
    notGreaterEqual, /// !>=
    notLess, /// !<
    notLessEqual, /// !<=
    notLessEqualGreater, /// !<>
    plus, /// +
-	plusEqual, /// +=
+    plusEquals, /// +=
    pow, /// ^^
-	powEqual, /// ^^=
+    powEquals, /// ^^=
    rBrace, /// }
    rBracket, /// ]
    rParen, /// $(RPAREN)
@ -2391,7 +2104,7 @@ enum TokenType: ushort
    unsignedShiftRightEqual, /// >>>=
    vararg, /// ...
    xor, /// ^
-	xorEqual, /// ^=
+    xorEquals, /// ^=
    bool_, /// $(D_KEYWORD bool)
    byte_, /// $(D_KEYWORD byte)
@ -2401,7 +2114,6 @@ enum TokenType: ushort
    char_, /// $(D_KEYWORD char)
    creal_, /// $(D_KEYWORD creal)
    dchar_, /// $(D_KEYWORD dchar)
 	delegate_, /// $(D_KEYWORD delegate)
    double_, /// $(D_KEYWORD double)
    float_, /// $(D_KEYWORD float)
    function_, /// $(D_KEYWORD function)
@ -2453,6 +2165,7 @@ enum TokenType: ushort
    continue_, /// $(D_KEYWORD continue)
    debug_, /// $(D_KEYWORD debug)
    default_, /// $(D_KEYWORD default)
    delegate_, /// $(D_KEYWORD delegate)
    delete_, /// $(D_KEYWORD delete)
    do_, /// $(D_KEYWORD do)
    else_, /// $(D_KEYWORD else)
@ -2529,22 +2242,298 @@ enum TokenType: ushort
    dstringLiteral, /// $(D_STRING "32-bit character string"d)
    stringLiteral, /// $(D_STRING "an 8-bit string")
    wstringLiteral, /// $(D_STRING "16-bit character string"w)
 	invalid, /// Not a valid token type
 }
 // Implementation details follow
 private:
 // For now a private helper that is tailored to the way lexer works
 // hides away forwardness of range by buffering
 // RA-version is strightforward thin wrapping
 // ATM it is byte-oriented
 private struct LexSource(R)
    if(isForwardRange!R && !isRandomAccessRange!R)
    {
    bool empty() const { return _empty; }
    auto ref front() const
    {
        return accum[accumIdx];
    }
    auto ref peek() const
    in
    {
        assert (accumIdx + 1 < accum.length);
    }
    body
    {
        return accum[accumIdx + 1];
    }
    void popFront()
    {
        ++_index;
        range.popFront();
        // if that was last byte
        // just advance so that open-righted slice just works
        accumIdx =  (accumIdx+1) & mask;
        if(range.empty)
        {
            _empty = true;
            return;
        }
        if(accumIdx == savedAccumIdx)
        {
            // and move stuff around
            auto oldLen = accum.length;
            auto toCopy = oldLen - accumIdx;
            accum.length *= 2; // keep pow of 2
            // copy starting with last item
            copy(retro(accum[accumIdx..oldLen]),
                retro(accum[$-toCopy..$]));
            savedAccumIdx = accum.length - toCopy;
        }
        accum[accumIdx] = range.front;
    }
    auto save()
    {
        typeof(this) copy = this;
        copy.range = range.save;
        // sadly need to dup circular buffer, as it overwrites items
        copy.accum = copy.accum.dup;
        return copy;
    }
    // mark a position to slice from later on
    size_t mark()
    {
        savedAccumIdx = accumIdx;
        return accumIdx;
    }
    // slice to current position from previously marked position
    auto slice() @property
    {
        // it's an open right range as usual
        return CircularRange(accum, savedAccumIdx, accumIdx);
    }
    size_t index() const @property
    {
        return _index;
    }
 private:
    this(R src, size_t bufferSize)
    {
        range = src;
        assert(bufferSize > 0);
        assert((bufferSize & (bufferSize-1)) == 0); //is power of 2
        accum = new ubyte[bufferSize];
        if(range.empty)
            _empty = true;
        else
            accum[accumIdx] = range.front; // load front
    }
    // a true RA-range of ubyte
    struct CircularRange
    {
        this(ubyte[] buf, size_t s, size_t e)
        {
            assert((buffer.length & (buffer.length-1)) == 0);
            buffer = buf;
            start = s;
            end = e;
        }
        //Forward range primitives
        @property bool empty() const { return start == end; }
        @property auto ref front() const { return buffer[start]; }
        void popFront() { start = (start + 1) & mask; }
        @property auto save() { return this; }
        //Backwards is a bit slower, but should be rarely used (if at all)
        @property ref back(){ return buffer[(end-1) & mask]; }
        void popBack() { end  = (end - 1) & mask; }
        // RA range primitives
        ref opIndex(size_t idx){ return buffer[(start+idx) & mask]; }
        @property size_t length()
        {
            return end < start ? end + buffer.length -start : end - start;
        }
        alias length opDollar;
        auto opSlice(size_t newStart, size_t newEnd)
        {
            size_t maskedStart = (start+newStart) & mask;
            size_t maskedEnd = (start+newEnd) & mask;
            return typeof(this)(buffer, maskedStart, maskedEnd);
        }
        // @@@bug fwd-ref in ldc0.10 (if placed above previous one)
        auto opSlice(){ return opSlice(0, length); }
    private:
        @property auto mask(){ return buffer.length-1; }
        size_t start, end;
        ubyte[] buffer;
    }
    @property auto mask(){ return accum.length-1; }
    R range;
    bool _empty;
    ubyte[] accum; // accumulator buffer for non-RA ranges
    size_t savedAccumIdx;
    size_t accumIdx; // current index in accumulator
    size_t _index; // index of current element in original range
 }
 // TODO: make sure it's RandomAccess later
 /*static assert(isRandomAccessRange!(
    LexSource!(typeof(filter!"true"(cast(ubyte[])null)))
    .CircularRange)
 );*/
 //trivial pass-through for RA ranges
 private struct LexSource(R)
    if(isRandomAccessRange!R)
 {
    bool empty() const @property { return cur >= range.length; }
    bool canPeek() const { return cur + 1 < range.length; }
    auto ref front() const @property { return range[cur]; }
    void popFront(){ cur++; }
    auto ref peek() const
    in
    {
        assert (canPeek());
    }
    body
    {
        return range[cur + 1];
    }
    auto save()
    {
        typeof(this) copy = this;
        copy.range = range.save;
        return copy;
    }
    auto mark()
    {
        saved = cur;
    }
    // use the underliying range slicing capability
    auto slice() @property
    {
        return range[saved..cur];
    }
    size_t index() const @property
    {
        return cur;
    }
 private:
    this(R src)
    {
        range = src;
    }
    size_t cur, saved;
    R range;
 }
 auto lexerSource(Range)(Range range, size_t bufSize=8)
    if(isForwardRange!Range && !isRandomAccessRange!Range
    && is(ElementType!Range : const(ubyte)))
 {
    return LexSource!(Range)(range, bufSize);
 }
 auto lexerSource(Range)(Range range)
    if(isRandomAccessRange!Range
    && is(ElementType!Range : const(ubyte)))
 {
    return LexSource!(Range)(range);
 }
 unittest
 {
    // test the basic functionality of a "mark-slice" range
    import std.string, std.stdio;
    static void test_hello(T)(T lexs)
    {
        assert(lexs.front == 'H');
        lexs.popFront();
        assert(lexs.front == 'e');
        foreach(i; 0..2)
        {
            auto saved = lexs.save;
            lexs.mark();
            assert(lexs.slice.equal(""));
            lexs.popFront();
            assert(lexs.slice.equal("e"), text(cast(char)lexs.front));
            lexs.popFrontN(4);
            auto bytes = lexs.slice.map!"cast(char)a".array();
            assert(bytes.equal("ello,"), bytes.to!string);
            lexs.mark();
            assert(lexs.slice.equal(""));
            assert(lexs.front == 'w');
            lexs.popFrontN(6);
            assert(lexs.empty);
            auto s = lexs.slice();
            auto msg = s.save.map!"cast(char)a".array;
            assert(s[].equal("world!"), msg);
            assert(s[2..$-1].equal("rld"), msg);
            assert(s[0] == 'w' && s[$-1] == '!');
            s.popFront();
            assert(s.front == 'o' && s.back == '!');
            s.popBack();
            assert(s.front == 'o' && s.back == 'd');
            //restore and repeat again
            lexs = saved;
        }
    }
    static void test_empty(T)(T lexs)
    {
        assert(lexs.empty);
        lexs.mark();
        assert(lexs.slice().equal(""));
    }
    auto fwdLex = lexerSource(
        "Hello, world!"
        .representation
        .filter!"a != ' '", 16 // and the one that is more then enough
    );
    test_hello(fwdLex);
    fwdLex = lexerSource(
        "Hello, world!"
        .representation
        .filter!"a != ' '", 1 // try the smallest initial buffer
    );
    test_hello(fwdLex);
    fwdLex = lexerSource("".representation.filter!"a != ' '");
    auto raLex = lexerSource("".representation);
    test_empty(raLex);
    test_empty(fwdLex);
    raLex = lexerSource("Hello,world!".representation);
    test_hello(raLex);
 }
 // uses auto-detection for pure, safe nothrow
 bool isRangeEoF(R)(ref R range)
 {
    return range.empty || range.front == 0 || range.front == 0x1a;
 }
-/*
+// Lookup table for token values
 * Slices of the above string to save memory. This array is automatically
 * generated.
 */
 immutable(string[TokenType.max + 1]) tokenValues = [
    "=",
    "@",
@ -2617,7 +2606,6 @@ immutable(string[TokenType.max + 1]) tokenValues = [
    "char",
    "creal",
    "dchar",
 	"delegate",
    "double",
    "float",
    "function",
@ -2667,6 +2655,7 @@ immutable(string[TokenType.max + 1]) tokenValues = [
    "continue",
    "debug",
    "default",
    "delegate",
    "delete",
    "do",
    "else",
@ -2742,7 +2731,6 @@ immutable(string[TokenType.max + 1]) tokenValues = [
    null,
    null,
    null,
 	null,
 ];
 pure string getTokenValue(const TokenType type)
@ -3396,5 +3384,4 @@ unittest
    assert (tokenCount == 16);
 }
 //void main(string[] args){}