merged
This commit is contained in:
commit
c1fcef1873
631
std/d/lexer.d
631
std/d/lexer.d
|
@ -102,7 +102,7 @@
|
||||||
*
|
*
|
||||||
* Copyright: Brian Schott 2013
|
* Copyright: Brian Schott 2013
|
||||||
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
|
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
|
||||||
* Authors: Brian Schott
|
* Authors: Brian Schott, Dmitry Olshansky
|
||||||
* Source: $(PHOBOSSRC std/d/_lexer.d)
|
* Source: $(PHOBOSSRC std/d/_lexer.d)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -203,7 +203,7 @@ enum IterationStyle
|
||||||
includeSpecialTokens = 0b0100,
|
includeSpecialTokens = 0b0100,
|
||||||
/// Do not stop iteration on reaching the ___EOF__ token
|
/// Do not stop iteration on reaching the ___EOF__ token
|
||||||
ignoreEOF = 0b1000,
|
ignoreEOF = 0b1000,
|
||||||
/// Include everything
|
/// Include _everything
|
||||||
everything = includeComments | includeWhitespace | ignoreEOF
|
everything = includeComments | includeWhitespace | ignoreEOF
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -266,7 +266,7 @@ struct LexerConfig
|
||||||
TokenStyle tokenStyle = tokenStyle.default_;
|
TokenStyle tokenStyle = tokenStyle.default_;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Replacement for the ___VERSION__ token. Defaults to 1.
|
* Replacement for the ___VERSION__ token. Defaults to 100.
|
||||||
*/
|
*/
|
||||||
uint versionNumber = 100;
|
uint versionNumber = 100;
|
||||||
|
|
||||||
|
@ -289,12 +289,6 @@ struct LexerConfig
|
||||||
* and error messsage.
|
* and error messsage.
|
||||||
*/
|
*/
|
||||||
void delegate(string, size_t, uint, uint, string) errorFunc;
|
void delegate(string, size_t, uint, uint, string) errorFunc;
|
||||||
|
|
||||||
/**
|
|
||||||
* Initial size of the lexer's internal token buffer in bytes. The lexer
|
|
||||||
* will grow this buffer if necessary.
|
|
||||||
*/
|
|
||||||
size_t bufferSize = 1024 * 4;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -331,287 +325,6 @@ auto byToken(R)(R range, LexerConfig config)
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
// For now a private helper that is tailored to the way lexer works
|
|
||||||
// hides away forwardness of range by buffering
|
|
||||||
// RA-version is strightforward thin wrapping
|
|
||||||
// ATM it is byte-oriented
|
|
||||||
private struct LexSource(R)
|
|
||||||
if(isForwardRange!R && !isRandomAccessRange!R)
|
|
||||||
{
|
|
||||||
bool empty() const { return _empty; }
|
|
||||||
|
|
||||||
auto ref front() const
|
|
||||||
{
|
|
||||||
return accum[accumIdx];
|
|
||||||
}
|
|
||||||
|
|
||||||
auto ref peek() const
|
|
||||||
in
|
|
||||||
{
|
|
||||||
assert (accumIdx + 1 < accum.length);
|
|
||||||
}
|
|
||||||
body
|
|
||||||
{
|
|
||||||
return accum[accumIdx + 1];
|
|
||||||
}
|
|
||||||
|
|
||||||
void popFront()
|
|
||||||
{
|
|
||||||
++_index;
|
|
||||||
range.popFront();
|
|
||||||
// if that was last byte
|
|
||||||
// just advance so that open-righted slice just works
|
|
||||||
accumIdx = (accumIdx+1) & mask;
|
|
||||||
if(range.empty)
|
|
||||||
{
|
|
||||||
_empty = true;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if(accumIdx == savedAccumIdx)
|
|
||||||
{
|
|
||||||
// and move stuff around
|
|
||||||
auto oldLen = accum.length;
|
|
||||||
auto toCopy = oldLen - accumIdx;
|
|
||||||
accum.length *= 2; // keep pow of 2
|
|
||||||
// copy starting with last item
|
|
||||||
copy(retro(accum[accumIdx..oldLen]),
|
|
||||||
retro(accum[$-toCopy..$]));
|
|
||||||
savedAccumIdx = accum.length - toCopy;
|
|
||||||
}
|
|
||||||
accum[accumIdx] = range.front;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto save()
|
|
||||||
{
|
|
||||||
typeof(this) copy = this;
|
|
||||||
copy.range = range.save;
|
|
||||||
// sadly need to dup circular buffer, as it overwrites items
|
|
||||||
copy.accum = copy.accum.dup;
|
|
||||||
return copy;
|
|
||||||
}
|
|
||||||
|
|
||||||
// mark a position to slice from later on
|
|
||||||
size_t mark()
|
|
||||||
{
|
|
||||||
savedAccumIdx = accumIdx;
|
|
||||||
return accumIdx;
|
|
||||||
}
|
|
||||||
|
|
||||||
// slice to current position from previously marked position
|
|
||||||
auto slice() @property
|
|
||||||
{
|
|
||||||
// it's an open right range as usual
|
|
||||||
return CircularRange(accum, savedAccumIdx, accumIdx);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t index() const @property
|
|
||||||
{
|
|
||||||
return _index;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
this(R src, size_t bufferSize)
|
|
||||||
{
|
|
||||||
range = src;
|
|
||||||
assert(bufferSize > 0);
|
|
||||||
assert((bufferSize & (bufferSize-1)) == 0); //is power of 2
|
|
||||||
accum = new ubyte[bufferSize];
|
|
||||||
if(range.empty)
|
|
||||||
_empty = true;
|
|
||||||
else
|
|
||||||
accum[accumIdx] = range.front; // load front
|
|
||||||
}
|
|
||||||
|
|
||||||
// a true RA-range of ubyte
|
|
||||||
struct CircularRange
|
|
||||||
{
|
|
||||||
this(ubyte[] buf, size_t s, size_t e)
|
|
||||||
{
|
|
||||||
assert((buffer.length & (buffer.length-1)) == 0);
|
|
||||||
buffer = buf;
|
|
||||||
start = s;
|
|
||||||
end = e;
|
|
||||||
}
|
|
||||||
//Forward range primitives
|
|
||||||
@property bool empty() const { return start == end; }
|
|
||||||
@property auto ref front() const { return buffer[start]; }
|
|
||||||
void popFront() { start = (start + 1) & mask; }
|
|
||||||
@property auto save() { return this; }
|
|
||||||
|
|
||||||
//Backwards is a bit slower, but should be rarely used (if at all)
|
|
||||||
@property ref back(){ return buffer[(end-1) & mask]; }
|
|
||||||
void popBack() { end = (end - 1) & mask; }
|
|
||||||
|
|
||||||
// RA range primitives
|
|
||||||
ref opIndex(size_t idx){ return buffer[(start+idx) & mask]; }
|
|
||||||
@property size_t length()
|
|
||||||
{
|
|
||||||
return end < start ? end + buffer.length -start : end - start;
|
|
||||||
}
|
|
||||||
alias length opDollar;
|
|
||||||
|
|
||||||
auto opSlice(size_t newStart, size_t newEnd)
|
|
||||||
{
|
|
||||||
size_t maskedStart = (start+newStart) & mask;
|
|
||||||
size_t maskedEnd = (start+newEnd) & mask;
|
|
||||||
return typeof(this)(buffer, maskedStart, maskedEnd);
|
|
||||||
}
|
|
||||||
// @@@bug fwd-ref in ldc0.10 (if placed above previous one)
|
|
||||||
auto opSlice(){ return opSlice(0, length); }
|
|
||||||
private:
|
|
||||||
@property auto mask(){ return buffer.length-1; }
|
|
||||||
size_t start, end;
|
|
||||||
ubyte[] buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
@property auto mask(){ return accum.length-1; }
|
|
||||||
|
|
||||||
R range;
|
|
||||||
bool _empty;
|
|
||||||
ubyte[] accum; // accumulator buffer for non-RA ranges
|
|
||||||
size_t savedAccumIdx;
|
|
||||||
size_t accumIdx; // current index in accumulator
|
|
||||||
size_t _index; // index of current element in original range
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: make sure it's RandomAccess later
|
|
||||||
/*static assert(isRandomAccessRange!(
|
|
||||||
LexSource!(typeof(filter!"true"(cast(ubyte[])null)))
|
|
||||||
.CircularRange)
|
|
||||||
);*/
|
|
||||||
|
|
||||||
//trivial pass-through for RA ranges
|
|
||||||
private struct LexSource(R)
|
|
||||||
if(isRandomAccessRange!R)
|
|
||||||
{
|
|
||||||
bool empty() const @property { return cur >= range.length; }
|
|
||||||
bool canPeek() const { return cur + 1 < range.length; }
|
|
||||||
auto ref front() const @property { return range[cur]; }
|
|
||||||
void popFront(){ cur++; }
|
|
||||||
|
|
||||||
auto ref peek() const
|
|
||||||
in
|
|
||||||
{
|
|
||||||
assert (canPeek());
|
|
||||||
}
|
|
||||||
body
|
|
||||||
{
|
|
||||||
return range[cur + 1];
|
|
||||||
}
|
|
||||||
|
|
||||||
auto save()
|
|
||||||
{
|
|
||||||
typeof(this) copy = this;
|
|
||||||
copy.range = range.save;
|
|
||||||
return copy;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto mark()
|
|
||||||
{
|
|
||||||
saved = cur;
|
|
||||||
}
|
|
||||||
|
|
||||||
// use the underliying range slicing capability
|
|
||||||
auto slice() @property
|
|
||||||
{
|
|
||||||
return range[saved..cur];
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t index() const @property
|
|
||||||
{
|
|
||||||
return cur;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
this(R src)
|
|
||||||
{
|
|
||||||
range = src;
|
|
||||||
}
|
|
||||||
size_t cur, saved;
|
|
||||||
R range;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto lexerSource(Range)(Range range, size_t bufSize=8)
|
|
||||||
if(isForwardRange!Range && !isRandomAccessRange!Range
|
|
||||||
&& is(ElementType!Range : const(ubyte)))
|
|
||||||
{
|
|
||||||
return LexSource!(Range)(range, bufSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
auto lexerSource(Range)(Range range)
|
|
||||||
if(isRandomAccessRange!Range
|
|
||||||
&& is(ElementType!Range : const(ubyte)))
|
|
||||||
{
|
|
||||||
return LexSource!(Range)(range);
|
|
||||||
}
|
|
||||||
|
|
||||||
unittest
|
|
||||||
{
|
|
||||||
// test the basic functionality of a "mark-slice" range
|
|
||||||
import std.string, std.stdio;
|
|
||||||
|
|
||||||
static void test_hello(T)(T lexs)
|
|
||||||
{
|
|
||||||
assert(lexs.front == 'H');
|
|
||||||
lexs.popFront();
|
|
||||||
assert(lexs.front == 'e');
|
|
||||||
foreach(i; 0..2)
|
|
||||||
{
|
|
||||||
auto saved = lexs.save;
|
|
||||||
lexs.mark();
|
|
||||||
assert(lexs.slice.equal(""));
|
|
||||||
lexs.popFront();
|
|
||||||
assert(lexs.slice.equal("e"), text(cast(char)lexs.front));
|
|
||||||
lexs.popFrontN(4);
|
|
||||||
auto bytes = lexs.slice.map!"cast(char)a".array();
|
|
||||||
assert(bytes.equal("ello,"), bytes.to!string);
|
|
||||||
lexs.mark();
|
|
||||||
assert(lexs.slice.equal(""));
|
|
||||||
assert(lexs.front == 'w');
|
|
||||||
lexs.popFrontN(6);
|
|
||||||
assert(lexs.empty);
|
|
||||||
auto s = lexs.slice();
|
|
||||||
auto msg = s.save.map!"cast(char)a".array;
|
|
||||||
assert(s[].equal("world!"), msg);
|
|
||||||
assert(s[2..$-1].equal("rld"), msg);
|
|
||||||
assert(s[0] == 'w' && s[$-1] == '!');
|
|
||||||
s.popFront();
|
|
||||||
assert(s.front == 'o' && s.back == '!');
|
|
||||||
s.popBack();
|
|
||||||
assert(s.front == 'o' && s.back == 'd');
|
|
||||||
//restore and repeat again
|
|
||||||
lexs = saved;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_empty(T)(T lexs)
|
|
||||||
{
|
|
||||||
assert(lexs.empty);
|
|
||||||
lexs.mark();
|
|
||||||
assert(lexs.slice().equal(""));
|
|
||||||
}
|
|
||||||
|
|
||||||
auto fwdLex = lexerSource(
|
|
||||||
"Hello, world!"
|
|
||||||
.representation
|
|
||||||
.filter!"a != ' '", 16 // and the one that is more then enough
|
|
||||||
);
|
|
||||||
test_hello(fwdLex);
|
|
||||||
fwdLex = lexerSource(
|
|
||||||
"Hello, world!"
|
|
||||||
.representation
|
|
||||||
.filter!"a != ' '", 1 // try the smallest initial buffer
|
|
||||||
);
|
|
||||||
test_hello(fwdLex);
|
|
||||||
fwdLex = lexerSource("".representation.filter!"a != ' '");
|
|
||||||
auto raLex = lexerSource("".representation);
|
|
||||||
test_empty(raLex);
|
|
||||||
test_empty(fwdLex);
|
|
||||||
raLex = lexerSource("Hello,world!".representation);
|
|
||||||
test_hello(raLex);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Range of tokens. Use byToken$(LPAREN)$(RPAREN) to instantiate.
|
* Range of tokens. Use byToken$(LPAREN)$(RPAREN) to instantiate.
|
||||||
*/
|
*/
|
||||||
|
@ -718,10 +431,10 @@ L_advance:
|
||||||
"=", "TokenType.assign",
|
"=", "TokenType.assign",
|
||||||
"@", "TokenType.at",
|
"@", "TokenType.at",
|
||||||
"&", "TokenType.bitAnd",
|
"&", "TokenType.bitAnd",
|
||||||
"&=", "TokenType.bitAndEqual",
|
"&=", "TokenType.bitAndEquals",
|
||||||
"|", "TokenType.bitOr",
|
"|", "TokenType.bitOr",
|
||||||
"|=", "TokenType.bitOrEqual",
|
"|=", "TokenType.bitOrEquals",
|
||||||
"~=", "TokenType.catEqual",
|
"~=", "TokenType.catEquals",
|
||||||
":", "TokenType.colon",
|
":", "TokenType.colon",
|
||||||
",", "TokenType.comma",
|
",", "TokenType.comma",
|
||||||
"--", "TokenType.decrement",
|
"--", "TokenType.decrement",
|
||||||
|
@ -741,21 +454,21 @@ L_advance:
|
||||||
"||", "TokenType.logicOr",
|
"||", "TokenType.logicOr",
|
||||||
"(", "TokenType.lParen",
|
"(", "TokenType.lParen",
|
||||||
"-", "TokenType.minus",
|
"-", "TokenType.minus",
|
||||||
"-=", "TokenType.minusEqual",
|
"-=", "TokenType.minusEquals",
|
||||||
"%", "TokenType.mod",
|
"%", "TokenType.mod",
|
||||||
"%=", "TokenType.modEqual",
|
"%=", "TokenType.modEquals",
|
||||||
"*=", "TokenType.mulEqual",
|
"*=", "TokenType.mulEquals",
|
||||||
"!", "TokenType.not",
|
"!", "TokenType.not",
|
||||||
"!=", "TokenType.notEqual",
|
"!=", "TokenType.notEquals",
|
||||||
"!>", "TokenType.notGreater",
|
"!>", "TokenType.notGreater",
|
||||||
"!>=", "TokenType.notGreaterEqual",
|
"!>=", "TokenType.notGreaterEqual",
|
||||||
"!<", "TokenType.notLess",
|
"!<", "TokenType.notLess",
|
||||||
"!<=", "TokenType.notLessEqual",
|
"!<=", "TokenType.notLessEqual",
|
||||||
"!<>", "TokenType.notLessEqualGreater",
|
"!<>", "TokenType.notLessEqualGreater",
|
||||||
"+", "TokenType.plus",
|
"+", "TokenType.plus",
|
||||||
"+=", "TokenType.plusEqual",
|
"+=", "TokenType.plusEquals",
|
||||||
"^^", "TokenType.pow",
|
"^^", "TokenType.pow",
|
||||||
"^^=", "TokenType.powEqual",
|
"^^=", "TokenType.powEquals",
|
||||||
"}", "TokenType.rBrace",
|
"}", "TokenType.rBrace",
|
||||||
"]", "TokenType.rBracket",
|
"]", "TokenType.rBracket",
|
||||||
")", "TokenType.rParen",
|
")", "TokenType.rParen",
|
||||||
|
@ -771,7 +484,7 @@ L_advance:
|
||||||
">>>", "TokenType.unsignedShiftRight",
|
">>>", "TokenType.unsignedShiftRight",
|
||||||
">>>=", "TokenType.unsignedShiftRightEqual",
|
">>>=", "TokenType.unsignedShiftRightEqual",
|
||||||
"^", "TokenType.xor",
|
"^", "TokenType.xor",
|
||||||
"^=", "TokenType.xorEqual",
|
"^=", "TokenType.xorEquals",
|
||||||
));
|
));
|
||||||
case '/':
|
case '/':
|
||||||
nextCharNonLF();
|
nextCharNonLF();
|
||||||
|
@ -792,7 +505,7 @@ L_advance:
|
||||||
goto L_advance; // tail-recursion
|
goto L_advance; // tail-recursion
|
||||||
|
|
||||||
case '=':
|
case '=':
|
||||||
current.type = TokenType.divEqual;
|
current.type = TokenType.divEquals;
|
||||||
current.value = "/=";
|
current.value = "/=";
|
||||||
src.popFront();
|
src.popFront();
|
||||||
return;
|
return;
|
||||||
|
@ -2168,7 +1881,7 @@ L_advance:
|
||||||
*/
|
*/
|
||||||
pure nothrow bool isOperator(const TokenType t)
|
pure nothrow bool isOperator(const TokenType t)
|
||||||
{
|
{
|
||||||
return t >= TokenType.assign && t <= TokenType.xorEqual;
|
return t >= TokenType.assign && t <= TokenType.xorEquals;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -2332,15 +2045,15 @@ enum TokenType: ushort
|
||||||
assign, /// =
|
assign, /// =
|
||||||
at, /// @
|
at, /// @
|
||||||
bitAnd, /// &
|
bitAnd, /// &
|
||||||
bitAndEqual, /// &=
|
bitAndEquals, /// &=
|
||||||
bitOr, /// |
|
bitOr, /// |
|
||||||
bitOrEqual, /// |=
|
bitOrEquals, /// |=
|
||||||
catEqual, /// ~=
|
catEquals, /// ~=
|
||||||
colon, /// :
|
colon, /// :
|
||||||
comma, /// ,
|
comma, /// ,
|
||||||
decrement, /// --
|
decrement, /// --
|
||||||
div, /// /
|
div, /// /
|
||||||
divEqual, /// /=
|
divEquals, /// /=
|
||||||
dollar, /// $
|
dollar, /// $
|
||||||
dot, /// .
|
dot, /// .
|
||||||
equals, /// ==
|
equals, /// ==
|
||||||
|
@ -2359,21 +2072,21 @@ enum TokenType: ushort
|
||||||
logicOr, /// ||
|
logicOr, /// ||
|
||||||
lParen, /// $(LPAREN)
|
lParen, /// $(LPAREN)
|
||||||
minus, /// -
|
minus, /// -
|
||||||
minusEqual, /// -=
|
minusEquals, /// -=
|
||||||
mod, /// %
|
mod, /// %
|
||||||
modEqual, /// %=
|
modEquals, /// %=
|
||||||
mulEqual, /// *=
|
mulEquals, /// *=
|
||||||
not, /// !
|
not, /// !
|
||||||
notEqual, /// !=
|
notEquals, /// !=
|
||||||
notGreater, /// !>
|
notGreater, /// !>
|
||||||
notGreaterEqual, /// !>=
|
notGreaterEqual, /// !>=
|
||||||
notLess, /// !<
|
notLess, /// !<
|
||||||
notLessEqual, /// !<=
|
notLessEqual, /// !<=
|
||||||
notLessEqualGreater, /// !<>
|
notLessEqualGreater, /// !<>
|
||||||
plus, /// +
|
plus, /// +
|
||||||
plusEqual, /// +=
|
plusEquals, /// +=
|
||||||
pow, /// ^^
|
pow, /// ^^
|
||||||
powEqual, /// ^^=
|
powEquals, /// ^^=
|
||||||
rBrace, /// }
|
rBrace, /// }
|
||||||
rBracket, /// ]
|
rBracket, /// ]
|
||||||
rParen, /// $(RPAREN)
|
rParen, /// $(RPAREN)
|
||||||
|
@ -2391,7 +2104,7 @@ enum TokenType: ushort
|
||||||
unsignedShiftRightEqual, /// >>>=
|
unsignedShiftRightEqual, /// >>>=
|
||||||
vararg, /// ...
|
vararg, /// ...
|
||||||
xor, /// ^
|
xor, /// ^
|
||||||
xorEqual, /// ^=
|
xorEquals, /// ^=
|
||||||
|
|
||||||
bool_, /// $(D_KEYWORD bool)
|
bool_, /// $(D_KEYWORD bool)
|
||||||
byte_, /// $(D_KEYWORD byte)
|
byte_, /// $(D_KEYWORD byte)
|
||||||
|
@ -2401,7 +2114,6 @@ enum TokenType: ushort
|
||||||
char_, /// $(D_KEYWORD char)
|
char_, /// $(D_KEYWORD char)
|
||||||
creal_, /// $(D_KEYWORD creal)
|
creal_, /// $(D_KEYWORD creal)
|
||||||
dchar_, /// $(D_KEYWORD dchar)
|
dchar_, /// $(D_KEYWORD dchar)
|
||||||
delegate_, /// $(D_KEYWORD delegate)
|
|
||||||
double_, /// $(D_KEYWORD double)
|
double_, /// $(D_KEYWORD double)
|
||||||
float_, /// $(D_KEYWORD float)
|
float_, /// $(D_KEYWORD float)
|
||||||
function_, /// $(D_KEYWORD function)
|
function_, /// $(D_KEYWORD function)
|
||||||
|
@ -2453,6 +2165,7 @@ enum TokenType: ushort
|
||||||
continue_, /// $(D_KEYWORD continue)
|
continue_, /// $(D_KEYWORD continue)
|
||||||
debug_, /// $(D_KEYWORD debug)
|
debug_, /// $(D_KEYWORD debug)
|
||||||
default_, /// $(D_KEYWORD default)
|
default_, /// $(D_KEYWORD default)
|
||||||
|
delegate_, /// $(D_KEYWORD delegate)
|
||||||
delete_, /// $(D_KEYWORD delete)
|
delete_, /// $(D_KEYWORD delete)
|
||||||
do_, /// $(D_KEYWORD do)
|
do_, /// $(D_KEYWORD do)
|
||||||
else_, /// $(D_KEYWORD else)
|
else_, /// $(D_KEYWORD else)
|
||||||
|
@ -2529,22 +2242,298 @@ enum TokenType: ushort
|
||||||
dstringLiteral, /// $(D_STRING "32-bit character string"d)
|
dstringLiteral, /// $(D_STRING "32-bit character string"d)
|
||||||
stringLiteral, /// $(D_STRING "an 8-bit string")
|
stringLiteral, /// $(D_STRING "an 8-bit string")
|
||||||
wstringLiteral, /// $(D_STRING "16-bit character string"w)
|
wstringLiteral, /// $(D_STRING "16-bit character string"w)
|
||||||
invalid, /// Not a valid token type
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Implementation details follow
|
// Implementation details follow
|
||||||
private:
|
private:
|
||||||
|
|
||||||
|
// For now a private helper that is tailored to the way lexer works
|
||||||
|
// hides away forwardness of range by buffering
|
||||||
|
// RA-version is strightforward thin wrapping
|
||||||
|
// ATM it is byte-oriented
|
||||||
|
private struct LexSource(R)
|
||||||
|
if(isForwardRange!R && !isRandomAccessRange!R)
|
||||||
|
{
|
||||||
|
bool empty() const { return _empty; }
|
||||||
|
|
||||||
|
auto ref front() const
|
||||||
|
{
|
||||||
|
return accum[accumIdx];
|
||||||
|
}
|
||||||
|
|
||||||
|
auto ref peek() const
|
||||||
|
in
|
||||||
|
{
|
||||||
|
assert (accumIdx + 1 < accum.length);
|
||||||
|
}
|
||||||
|
body
|
||||||
|
{
|
||||||
|
return accum[accumIdx + 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
void popFront()
|
||||||
|
{
|
||||||
|
++_index;
|
||||||
|
range.popFront();
|
||||||
|
// if that was last byte
|
||||||
|
// just advance so that open-righted slice just works
|
||||||
|
accumIdx = (accumIdx+1) & mask;
|
||||||
|
if(range.empty)
|
||||||
|
{
|
||||||
|
_empty = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if(accumIdx == savedAccumIdx)
|
||||||
|
{
|
||||||
|
// and move stuff around
|
||||||
|
auto oldLen = accum.length;
|
||||||
|
auto toCopy = oldLen - accumIdx;
|
||||||
|
accum.length *= 2; // keep pow of 2
|
||||||
|
// copy starting with last item
|
||||||
|
copy(retro(accum[accumIdx..oldLen]),
|
||||||
|
retro(accum[$-toCopy..$]));
|
||||||
|
savedAccumIdx = accum.length - toCopy;
|
||||||
|
}
|
||||||
|
accum[accumIdx] = range.front;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto save()
|
||||||
|
{
|
||||||
|
typeof(this) copy = this;
|
||||||
|
copy.range = range.save;
|
||||||
|
// sadly need to dup circular buffer, as it overwrites items
|
||||||
|
copy.accum = copy.accum.dup;
|
||||||
|
return copy;
|
||||||
|
}
|
||||||
|
|
||||||
|
// mark a position to slice from later on
|
||||||
|
size_t mark()
|
||||||
|
{
|
||||||
|
savedAccumIdx = accumIdx;
|
||||||
|
return accumIdx;
|
||||||
|
}
|
||||||
|
|
||||||
|
// slice to current position from previously marked position
|
||||||
|
auto slice() @property
|
||||||
|
{
|
||||||
|
// it's an open right range as usual
|
||||||
|
return CircularRange(accum, savedAccumIdx, accumIdx);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t index() const @property
|
||||||
|
{
|
||||||
|
return _index;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
this(R src, size_t bufferSize)
|
||||||
|
{
|
||||||
|
range = src;
|
||||||
|
assert(bufferSize > 0);
|
||||||
|
assert((bufferSize & (bufferSize-1)) == 0); //is power of 2
|
||||||
|
accum = new ubyte[bufferSize];
|
||||||
|
if(range.empty)
|
||||||
|
_empty = true;
|
||||||
|
else
|
||||||
|
accum[accumIdx] = range.front; // load front
|
||||||
|
}
|
||||||
|
|
||||||
|
// a true RA-range of ubyte
|
||||||
|
struct CircularRange
|
||||||
|
{
|
||||||
|
this(ubyte[] buf, size_t s, size_t e)
|
||||||
|
{
|
||||||
|
assert((buffer.length & (buffer.length-1)) == 0);
|
||||||
|
buffer = buf;
|
||||||
|
start = s;
|
||||||
|
end = e;
|
||||||
|
}
|
||||||
|
//Forward range primitives
|
||||||
|
@property bool empty() const { return start == end; }
|
||||||
|
@property auto ref front() const { return buffer[start]; }
|
||||||
|
void popFront() { start = (start + 1) & mask; }
|
||||||
|
@property auto save() { return this; }
|
||||||
|
|
||||||
|
//Backwards is a bit slower, but should be rarely used (if at all)
|
||||||
|
@property ref back(){ return buffer[(end-1) & mask]; }
|
||||||
|
void popBack() { end = (end - 1) & mask; }
|
||||||
|
|
||||||
|
// RA range primitives
|
||||||
|
ref opIndex(size_t idx){ return buffer[(start+idx) & mask]; }
|
||||||
|
@property size_t length()
|
||||||
|
{
|
||||||
|
return end < start ? end + buffer.length -start : end - start;
|
||||||
|
}
|
||||||
|
alias length opDollar;
|
||||||
|
|
||||||
|
auto opSlice(size_t newStart, size_t newEnd)
|
||||||
|
{
|
||||||
|
size_t maskedStart = (start+newStart) & mask;
|
||||||
|
size_t maskedEnd = (start+newEnd) & mask;
|
||||||
|
return typeof(this)(buffer, maskedStart, maskedEnd);
|
||||||
|
}
|
||||||
|
// @@@bug fwd-ref in ldc0.10 (if placed above previous one)
|
||||||
|
auto opSlice(){ return opSlice(0, length); }
|
||||||
|
private:
|
||||||
|
@property auto mask(){ return buffer.length-1; }
|
||||||
|
size_t start, end;
|
||||||
|
ubyte[] buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
@property auto mask(){ return accum.length-1; }
|
||||||
|
|
||||||
|
R range;
|
||||||
|
bool _empty;
|
||||||
|
ubyte[] accum; // accumulator buffer for non-RA ranges
|
||||||
|
size_t savedAccumIdx;
|
||||||
|
size_t accumIdx; // current index in accumulator
|
||||||
|
size_t _index; // index of current element in original range
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: make sure it's RandomAccess later
|
||||||
|
/*static assert(isRandomAccessRange!(
|
||||||
|
LexSource!(typeof(filter!"true"(cast(ubyte[])null)))
|
||||||
|
.CircularRange)
|
||||||
|
);*/
|
||||||
|
|
||||||
|
//trivial pass-through for RA ranges
|
||||||
|
private struct LexSource(R)
|
||||||
|
if(isRandomAccessRange!R)
|
||||||
|
{
|
||||||
|
bool empty() const @property { return cur >= range.length; }
|
||||||
|
bool canPeek() const { return cur + 1 < range.length; }
|
||||||
|
auto ref front() const @property { return range[cur]; }
|
||||||
|
void popFront(){ cur++; }
|
||||||
|
|
||||||
|
auto ref peek() const
|
||||||
|
in
|
||||||
|
{
|
||||||
|
assert (canPeek());
|
||||||
|
}
|
||||||
|
body
|
||||||
|
{
|
||||||
|
return range[cur + 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
auto save()
|
||||||
|
{
|
||||||
|
typeof(this) copy = this;
|
||||||
|
copy.range = range.save;
|
||||||
|
return copy;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto mark()
|
||||||
|
{
|
||||||
|
saved = cur;
|
||||||
|
}
|
||||||
|
|
||||||
|
// use the underliying range slicing capability
|
||||||
|
auto slice() @property
|
||||||
|
{
|
||||||
|
return range[saved..cur];
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t index() const @property
|
||||||
|
{
|
||||||
|
return cur;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
this(R src)
|
||||||
|
{
|
||||||
|
range = src;
|
||||||
|
}
|
||||||
|
size_t cur, saved;
|
||||||
|
R range;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto lexerSource(Range)(Range range, size_t bufSize=8)
|
||||||
|
if(isForwardRange!Range && !isRandomAccessRange!Range
|
||||||
|
&& is(ElementType!Range : const(ubyte)))
|
||||||
|
{
|
||||||
|
return LexSource!(Range)(range, bufSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto lexerSource(Range)(Range range)
|
||||||
|
if(isRandomAccessRange!Range
|
||||||
|
&& is(ElementType!Range : const(ubyte)))
|
||||||
|
{
|
||||||
|
return LexSource!(Range)(range);
|
||||||
|
}
|
||||||
|
|
||||||
|
unittest
|
||||||
|
{
|
||||||
|
// test the basic functionality of a "mark-slice" range
|
||||||
|
import std.string, std.stdio;
|
||||||
|
|
||||||
|
static void test_hello(T)(T lexs)
|
||||||
|
{
|
||||||
|
assert(lexs.front == 'H');
|
||||||
|
lexs.popFront();
|
||||||
|
assert(lexs.front == 'e');
|
||||||
|
foreach(i; 0..2)
|
||||||
|
{
|
||||||
|
auto saved = lexs.save;
|
||||||
|
lexs.mark();
|
||||||
|
assert(lexs.slice.equal(""));
|
||||||
|
lexs.popFront();
|
||||||
|
assert(lexs.slice.equal("e"), text(cast(char)lexs.front));
|
||||||
|
lexs.popFrontN(4);
|
||||||
|
auto bytes = lexs.slice.map!"cast(char)a".array();
|
||||||
|
assert(bytes.equal("ello,"), bytes.to!string);
|
||||||
|
lexs.mark();
|
||||||
|
assert(lexs.slice.equal(""));
|
||||||
|
assert(lexs.front == 'w');
|
||||||
|
lexs.popFrontN(6);
|
||||||
|
assert(lexs.empty);
|
||||||
|
auto s = lexs.slice();
|
||||||
|
auto msg = s.save.map!"cast(char)a".array;
|
||||||
|
assert(s[].equal("world!"), msg);
|
||||||
|
assert(s[2..$-1].equal("rld"), msg);
|
||||||
|
assert(s[0] == 'w' && s[$-1] == '!');
|
||||||
|
s.popFront();
|
||||||
|
assert(s.front == 'o' && s.back == '!');
|
||||||
|
s.popBack();
|
||||||
|
assert(s.front == 'o' && s.back == 'd');
|
||||||
|
//restore and repeat again
|
||||||
|
lexs = saved;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_empty(T)(T lexs)
|
||||||
|
{
|
||||||
|
assert(lexs.empty);
|
||||||
|
lexs.mark();
|
||||||
|
assert(lexs.slice().equal(""));
|
||||||
|
}
|
||||||
|
|
||||||
|
auto fwdLex = lexerSource(
|
||||||
|
"Hello, world!"
|
||||||
|
.representation
|
||||||
|
.filter!"a != ' '", 16 // and the one that is more then enough
|
||||||
|
);
|
||||||
|
test_hello(fwdLex);
|
||||||
|
fwdLex = lexerSource(
|
||||||
|
"Hello, world!"
|
||||||
|
.representation
|
||||||
|
.filter!"a != ' '", 1 // try the smallest initial buffer
|
||||||
|
);
|
||||||
|
test_hello(fwdLex);
|
||||||
|
fwdLex = lexerSource("".representation.filter!"a != ' '");
|
||||||
|
auto raLex = lexerSource("".representation);
|
||||||
|
test_empty(raLex);
|
||||||
|
test_empty(fwdLex);
|
||||||
|
raLex = lexerSource("Hello,world!".representation);
|
||||||
|
test_hello(raLex);
|
||||||
|
}
|
||||||
|
|
||||||
// uses auto-detection for pure, safe nothrow
|
// uses auto-detection for pure, safe nothrow
|
||||||
bool isRangeEoF(R)(ref R range)
|
bool isRangeEoF(R)(ref R range)
|
||||||
{
|
{
|
||||||
return range.empty || range.front == 0 || range.front == 0x1a;
|
return range.empty || range.front == 0 || range.front == 0x1a;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
// Lookup table for token values
|
||||||
* Slices of the above string to save memory. This array is automatically
|
|
||||||
* generated.
|
|
||||||
*/
|
|
||||||
immutable(string[TokenType.max + 1]) tokenValues = [
|
immutable(string[TokenType.max + 1]) tokenValues = [
|
||||||
"=",
|
"=",
|
||||||
"@",
|
"@",
|
||||||
|
@ -2617,7 +2606,6 @@ immutable(string[TokenType.max + 1]) tokenValues = [
|
||||||
"char",
|
"char",
|
||||||
"creal",
|
"creal",
|
||||||
"dchar",
|
"dchar",
|
||||||
"delegate",
|
|
||||||
"double",
|
"double",
|
||||||
"float",
|
"float",
|
||||||
"function",
|
"function",
|
||||||
|
@ -2667,6 +2655,7 @@ immutable(string[TokenType.max + 1]) tokenValues = [
|
||||||
"continue",
|
"continue",
|
||||||
"debug",
|
"debug",
|
||||||
"default",
|
"default",
|
||||||
|
"delegate",
|
||||||
"delete",
|
"delete",
|
||||||
"do",
|
"do",
|
||||||
"else",
|
"else",
|
||||||
|
@ -2742,7 +2731,6 @@ immutable(string[TokenType.max + 1]) tokenValues = [
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
null,
|
|
||||||
];
|
];
|
||||||
|
|
||||||
pure string getTokenValue(const TokenType type)
|
pure string getTokenValue(const TokenType type)
|
||||||
|
@ -3396,5 +3384,4 @@ unittest
|
||||||
assert (tokenCount == 16);
|
assert (tokenCount == 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//void main(string[] args){}
|
//void main(string[] args){}
|
||||||
|
|
Loading…
Reference in New Issue