whitespace
This commit is contained in:
parent
c1fcef1873
commit
dc81410008
178
std/d/lexer.d
178
std/d/lexer.d
|
@ -4,7 +4,7 @@
|
||||||
* This module contains a range-based _lexer for the D programming language.
|
* This module contains a range-based _lexer for the D programming language.
|
||||||
*
|
*
|
||||||
* For performance reasons the _lexer contained in this module operates only on
|
* For performance reasons the _lexer contained in this module operates only on
|
||||||
* ASCII and UTF-8 encoded source code. If the use of other encodings is
|
* ASCII or UTF-8 encoded source code. If the use of other encodings is
|
||||||
* desired, the source code must be converted to UTF-8 before passing it to this
|
* desired, the source code must be converted to UTF-8 before passing it to this
|
||||||
* _lexer.
|
* _lexer.
|
||||||
*
|
*
|
||||||
|
@ -125,8 +125,8 @@ version (unittest) import std.stdio;
|
||||||
public:
|
public:
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents a D token
|
* Represents a D token
|
||||||
*/
|
*/
|
||||||
struct Token
|
struct Token
|
||||||
{
|
{
|
||||||
/**
|
/**
|
||||||
|
@ -188,9 +188,9 @@ struct Token
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Configure the behavior of the byToken() function. These flags may be
|
* Configure the behavior of the byToken() function. These flags may be
|
||||||
* combined using a bitwise or.
|
* combined using a bitwise or.
|
||||||
*/
|
*/
|
||||||
enum IterationStyle
|
enum IterationStyle
|
||||||
{
|
{
|
||||||
/// Only include code, not whitespace or comments
|
/// Only include code, not whitespace or comments
|
||||||
|
@ -208,9 +208,9 @@ enum IterationStyle
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Configuration of the token lexing style. These flags may be combined with a
|
* Configuration of the token lexing style. These flags may be combined with a
|
||||||
* bitwise or.
|
* bitwise or.
|
||||||
*/
|
*/
|
||||||
enum TokenStyle : uint
|
enum TokenStyle : uint
|
||||||
{
|
{
|
||||||
/**
|
/**
|
||||||
|
@ -251,8 +251,8 @@ enum TokenStyle : uint
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Lexer configuration
|
* Lexer configuration
|
||||||
*/
|
*/
|
||||||
struct LexerConfig
|
struct LexerConfig
|
||||||
{
|
{
|
||||||
/**
|
/**
|
||||||
|
@ -292,14 +292,14 @@ struct LexerConfig
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Iterate over the given range of characters by D tokens.
|
* Iterate over the given range of characters by D tokens.
|
||||||
* Params:
|
* Params:
|
||||||
* range = the range of characters
|
* range = the range of characters
|
||||||
* config = the lexer configuration
|
* config = the lexer configuration
|
||||||
* bufferSize = initial size of internal circular buffer
|
* bufferSize = initial size of internal circular buffer
|
||||||
* Returns:
|
* Returns:
|
||||||
* an input range of tokens
|
* an input range of tokens
|
||||||
*/
|
*/
|
||||||
auto byToken(R)(R range, LexerConfig config, size_t bufferSize = 4*1024)
|
auto byToken(R)(R range, LexerConfig config, size_t bufferSize = 4*1024)
|
||||||
if (isForwardRange!(R) && !isRandomAccessRange!(R)
|
if (isForwardRange!(R) && !isRandomAccessRange!(R)
|
||||||
&& is(ElementType!R : const(ubyte)))
|
&& is(ElementType!R : const(ubyte)))
|
||||||
|
@ -326,8 +326,8 @@ auto byToken(R)(R range, LexerConfig config)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Range of tokens. Use byToken$(LPAREN)$(RPAREN) to instantiate.
|
* Range of tokens. Use byToken$(LPAREN)$(RPAREN) to instantiate.
|
||||||
*/
|
*/
|
||||||
struct TokenRange(LexSrc)
|
struct TokenRange(LexSrc)
|
||||||
//if ( is(LexSrc : LexSource!(U...), U...)) //check for LexSource
|
//if ( is(LexSrc : LexSource!(U...), U...)) //check for LexSource
|
||||||
{
|
{
|
||||||
|
@ -431,15 +431,15 @@ L_advance:
|
||||||
"=", "TokenType.assign",
|
"=", "TokenType.assign",
|
||||||
"@", "TokenType.at",
|
"@", "TokenType.at",
|
||||||
"&", "TokenType.bitAnd",
|
"&", "TokenType.bitAnd",
|
||||||
"&=", "TokenType.bitAndEquals",
|
"&=", "TokenType.bitAndEqual",
|
||||||
"|", "TokenType.bitOr",
|
"|", "TokenType.bitOr",
|
||||||
"|=", "TokenType.bitOrEquals",
|
"|=", "TokenType.bitOrEqual",
|
||||||
"~=", "TokenType.catEquals",
|
"~=", "TokenType.catEqual",
|
||||||
":", "TokenType.colon",
|
":", "TokenType.colon",
|
||||||
",", "TokenType.comma",
|
",", "TokenType.comma",
|
||||||
"--", "TokenType.decrement",
|
"--", "TokenType.decrement",
|
||||||
"$", "TokenType.dollar",
|
"$", "TokenType.dollar",
|
||||||
"==", "TokenType.equals",
|
"==", "TokenType.equal",
|
||||||
"=>", "TokenType.goesTo",
|
"=>", "TokenType.goesTo",
|
||||||
">", "TokenType.greater",
|
">", "TokenType.greater",
|
||||||
">=", "TokenType.greaterEqual",
|
">=", "TokenType.greaterEqual",
|
||||||
|
@ -454,21 +454,21 @@ L_advance:
|
||||||
"||", "TokenType.logicOr",
|
"||", "TokenType.logicOr",
|
||||||
"(", "TokenType.lParen",
|
"(", "TokenType.lParen",
|
||||||
"-", "TokenType.minus",
|
"-", "TokenType.minus",
|
||||||
"-=", "TokenType.minusEquals",
|
"-=", "TokenType.minusEqual",
|
||||||
"%", "TokenType.mod",
|
"%", "TokenType.mod",
|
||||||
"%=", "TokenType.modEquals",
|
"%=", "TokenType.modEqual",
|
||||||
"*=", "TokenType.mulEquals",
|
"*=", "TokenType.mulEqual",
|
||||||
"!", "TokenType.not",
|
"!", "TokenType.not",
|
||||||
"!=", "TokenType.notEquals",
|
"!=", "TokenType.notEqual",
|
||||||
"!>", "TokenType.notGreater",
|
"!>", "TokenType.notGreater",
|
||||||
"!>=", "TokenType.notGreaterEqual",
|
"!>=", "TokenType.notGreaterEqual",
|
||||||
"!<", "TokenType.notLess",
|
"!<", "TokenType.notLess",
|
||||||
"!<=", "TokenType.notLessEqual",
|
"!<=", "TokenType.notLessEqual",
|
||||||
"!<>", "TokenType.notLessEqualGreater",
|
"!<>", "TokenType.notLessEqualGreater",
|
||||||
"+", "TokenType.plus",
|
"+", "TokenType.plus",
|
||||||
"+=", "TokenType.plusEquals",
|
"+=", "TokenType.plusEqual",
|
||||||
"^^", "TokenType.pow",
|
"^^", "TokenType.pow",
|
||||||
"^^=", "TokenType.powEquals",
|
"^^=", "TokenType.powEqual",
|
||||||
"}", "TokenType.rBrace",
|
"}", "TokenType.rBrace",
|
||||||
"]", "TokenType.rBracket",
|
"]", "TokenType.rBracket",
|
||||||
")", "TokenType.rParen",
|
")", "TokenType.rParen",
|
||||||
|
@ -484,7 +484,7 @@ L_advance:
|
||||||
">>>", "TokenType.unsignedShiftRight",
|
">>>", "TokenType.unsignedShiftRight",
|
||||||
">>>=", "TokenType.unsignedShiftRightEqual",
|
">>>=", "TokenType.unsignedShiftRightEqual",
|
||||||
"^", "TokenType.xor",
|
"^", "TokenType.xor",
|
||||||
"^=", "TokenType.xorEquals",
|
"^=", "TokenType.xorEqual",
|
||||||
));
|
));
|
||||||
case '/':
|
case '/':
|
||||||
nextCharNonLF();
|
nextCharNonLF();
|
||||||
|
@ -505,7 +505,7 @@ L_advance:
|
||||||
goto L_advance; // tail-recursion
|
goto L_advance; // tail-recursion
|
||||||
|
|
||||||
case '=':
|
case '=':
|
||||||
current.type = TokenType.divEquals;
|
current.type = TokenType.divEqual;
|
||||||
current.value = "/=";
|
current.value = "/=";
|
||||||
src.popFront();
|
src.popFront();
|
||||||
return;
|
return;
|
||||||
|
@ -1877,186 +1877,186 @@ L_advance:
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns: true if the token is an operator
|
* Returns: true if the token is an operator
|
||||||
*/
|
*/
|
||||||
pure nothrow bool isOperator(const TokenType t)
|
pure nothrow bool isOperator(const TokenType t)
|
||||||
{
|
{
|
||||||
return t >= TokenType.assign && t <= TokenType.xorEquals;
|
return t >= TokenType.assign && t <= TokenType.xorEqual;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ditto
|
* ditto
|
||||||
*/
|
*/
|
||||||
pure nothrow bool isOperator(ref const Token t)
|
pure nothrow bool isOperator(ref const Token t)
|
||||||
{
|
{
|
||||||
return isOperator(t.type);
|
return isOperator(t.type);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns: true if the token is a keyword
|
* Returns: true if the token is a keyword
|
||||||
*/
|
*/
|
||||||
pure nothrow bool isKeyword(const TokenType t)
|
pure nothrow bool isKeyword(const TokenType t)
|
||||||
{
|
{
|
||||||
return t >= TokenType.bool_ && t <= TokenType.with_;
|
return t >= TokenType.bool_ && t <= TokenType.with_;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ditto
|
* ditto
|
||||||
*/
|
*/
|
||||||
pure nothrow bool isKeyword(ref const Token t)
|
pure nothrow bool isKeyword(ref const Token t)
|
||||||
{
|
{
|
||||||
return isKeyword(t.type);
|
return isKeyword(t.type);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns: true if the token is a built-in type
|
* Returns: true if the token is a built-in type
|
||||||
*/
|
*/
|
||||||
pure nothrow bool isType(const TokenType t)
|
pure nothrow bool isType(const TokenType t)
|
||||||
{
|
{
|
||||||
return t >= TokenType.bool_ && t <= TokenType.wchar_;
|
return t >= TokenType.bool_ && t <= TokenType.wchar_;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ditto
|
* ditto
|
||||||
*/
|
*/
|
||||||
pure nothrow bool isType(ref const Token t)
|
pure nothrow bool isType(ref const Token t)
|
||||||
{
|
{
|
||||||
return isType(t.type);
|
return isType(t.type);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns: true if the token is an attribute
|
* Returns: true if the token is an attribute
|
||||||
*/
|
*/
|
||||||
pure nothrow bool isAttribute(const TokenType t)
|
pure nothrow bool isAttribute(const TokenType t)
|
||||||
{
|
{
|
||||||
return t >= TokenType.align_ && t <= TokenType.static_;
|
return t >= TokenType.align_ && t <= TokenType.static_;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ditto
|
* ditto
|
||||||
*/
|
*/
|
||||||
pure nothrow bool isAttribute(ref const Token t)
|
pure nothrow bool isAttribute(ref const Token t)
|
||||||
{
|
{
|
||||||
return isAttribute(t.type);
|
return isAttribute(t.type);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns: true if the token is a protection attribute
|
* Returns: true if the token is a protection attribute
|
||||||
*/
|
*/
|
||||||
pure nothrow bool isProtection(const TokenType t)
|
pure nothrow bool isProtection(const TokenType t)
|
||||||
{
|
{
|
||||||
return t >= TokenType.export_ && t <= TokenType.public_;
|
return t >= TokenType.export_ && t <= TokenType.public_;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ditto
|
* ditto
|
||||||
*/
|
*/
|
||||||
pure nothrow bool isProtection(ref const Token t)
|
pure nothrow bool isProtection(ref const Token t)
|
||||||
{
|
{
|
||||||
return isProtection(t.type);
|
return isProtection(t.type);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns: true if the token is a compile-time constant such as ___DATE__
|
* Returns: true if the token is a compile-time constant such as ___DATE__
|
||||||
*/
|
*/
|
||||||
pure nothrow bool isConstant(const TokenType t)
|
pure nothrow bool isConstant(const TokenType t)
|
||||||
{
|
{
|
||||||
return t >= TokenType.date && t <= TokenType.traits;
|
return t >= TokenType.date && t <= TokenType.traits;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ditto
|
* ditto
|
||||||
*/
|
*/
|
||||||
pure nothrow bool isConstant(ref const Token t)
|
pure nothrow bool isConstant(ref const Token t)
|
||||||
{
|
{
|
||||||
return isConstant(t.type);
|
return isConstant(t.type);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns: true if the token is a string or number literal
|
* Returns: true if the token is a string or number literal
|
||||||
*/
|
*/
|
||||||
pure nothrow bool isLiteral(const TokenType t)
|
pure nothrow bool isLiteral(const TokenType t)
|
||||||
{
|
{
|
||||||
return t >= TokenType.doubleLiteral && t <= TokenType.wstringLiteral;
|
return t >= TokenType.doubleLiteral && t <= TokenType.wstringLiteral;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ditto
|
* ditto
|
||||||
*/
|
*/
|
||||||
pure nothrow bool isLiteral(ref const Token t)
|
pure nothrow bool isLiteral(ref const Token t)
|
||||||
{
|
{
|
||||||
return isLiteral(t.type);
|
return isLiteral(t.type);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns: true if the token is a number literal
|
* Returns: true if the token is a number literal
|
||||||
*/
|
*/
|
||||||
pure nothrow bool isNumberLiteral(const TokenType t)
|
pure nothrow bool isNumberLiteral(const TokenType t)
|
||||||
{
|
{
|
||||||
return t >= TokenType.doubleLiteral && t <= TokenType.ulongLiteral;
|
return t >= TokenType.doubleLiteral && t <= TokenType.ulongLiteral;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ditto
|
* ditto
|
||||||
*/
|
*/
|
||||||
pure nothrow bool isNumberLiteral(ref const Token t)
|
pure nothrow bool isNumberLiteral(ref const Token t)
|
||||||
{
|
{
|
||||||
return isNumberLiteral(t.type);
|
return isNumberLiteral(t.type);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns: true if the token is a string literal
|
* Returns: true if the token is a string literal
|
||||||
*/
|
*/
|
||||||
pure nothrow bool isStringLiteral(const TokenType t)
|
pure nothrow bool isStringLiteral(const TokenType t)
|
||||||
{
|
{
|
||||||
return t >= TokenType.dstringLiteral && t <= TokenType.wstringLiteral;
|
return t >= TokenType.dstringLiteral && t <= TokenType.wstringLiteral;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ditto
|
* ditto
|
||||||
*/
|
*/
|
||||||
pure nothrow bool isStringLiteral(ref const Token t)
|
pure nothrow bool isStringLiteral(ref const Token t)
|
||||||
{
|
{
|
||||||
return isStringLiteral(t.type);
|
return isStringLiteral(t.type);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns: true if the token is whitespace, a commemnt, a special token
|
* Returns: true if the token is whitespace, a commemnt, a special token
|
||||||
* sequence, or an identifier
|
* sequence, or an identifier
|
||||||
*/
|
*/
|
||||||
pure nothrow bool isMisc(const TokenType t)
|
pure nothrow bool isMisc(const TokenType t)
|
||||||
{
|
{
|
||||||
return t >= TokenType.comment && t <= TokenType.specialTokenSequence;
|
return t >= TokenType.comment && t <= TokenType.specialTokenSequence;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ditto
|
* ditto
|
||||||
*/
|
*/
|
||||||
pure nothrow bool isMisc(ref const Token t)
|
pure nothrow bool isMisc(ref const Token t)
|
||||||
{
|
{
|
||||||
return isMisc(t.type);
|
return isMisc(t.type);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Listing of all the tokens in the D language.
|
* Listing of all the tokens in the D language.
|
||||||
*/
|
*/
|
||||||
enum TokenType: ushort
|
enum TokenType: ushort
|
||||||
{
|
{
|
||||||
assign, /// =
|
assign, /// =
|
||||||
at, /// @
|
at, /// @
|
||||||
bitAnd, /// &
|
bitAnd, /// &
|
||||||
bitAndEquals, /// &=
|
bitAndEqual, /// &=
|
||||||
bitOr, /// |
|
bitOr, /// |
|
||||||
bitOrEquals, /// |=
|
bitOrEqual, /// |=
|
||||||
catEquals, /// ~=
|
catEqual, /// ~=
|
||||||
colon, /// :
|
colon, /// :
|
||||||
comma, /// ,
|
comma, /// ,
|
||||||
decrement, /// --
|
decrement, /// --
|
||||||
div, /// /
|
div, /// /
|
||||||
divEquals, /// /=
|
divEqual, /// /=
|
||||||
dollar, /// $
|
dollar, /// $
|
||||||
dot, /// .
|
dot, /// .
|
||||||
equals, /// ==
|
equal, /// ==
|
||||||
goesTo, /// =>
|
goesTo, /// =>
|
||||||
greater, /// >
|
greater, /// >
|
||||||
greaterEqual, /// >=
|
greaterEqual, /// >=
|
||||||
|
@ -2072,21 +2072,21 @@ enum TokenType: ushort
|
||||||
logicOr, /// ||
|
logicOr, /// ||
|
||||||
lParen, /// $(LPAREN)
|
lParen, /// $(LPAREN)
|
||||||
minus, /// -
|
minus, /// -
|
||||||
minusEquals, /// -=
|
minusEqual, /// -=
|
||||||
mod, /// %
|
mod, /// %
|
||||||
modEquals, /// %=
|
modEqual, /// %=
|
||||||
mulEquals, /// *=
|
mulEqual, /// *=
|
||||||
not, /// !
|
not, /// !
|
||||||
notEquals, /// !=
|
notEqual, /// !=
|
||||||
notGreater, /// !>
|
notGreater, /// !>
|
||||||
notGreaterEqual, /// !>=
|
notGreaterEqual, /// !>=
|
||||||
notLess, /// !<
|
notLess, /// !<
|
||||||
notLessEqual, /// !<=
|
notLessEqual, /// !<=
|
||||||
notLessEqualGreater, /// !<>
|
notLessEqualGreater, /// !<>
|
||||||
plus, /// +
|
plus, /// +
|
||||||
plusEquals, /// +=
|
plusEqual, /// +=
|
||||||
pow, /// ^^
|
pow, /// ^^
|
||||||
powEquals, /// ^^=
|
powEqual, /// ^^=
|
||||||
rBrace, /// }
|
rBrace, /// }
|
||||||
rBracket, /// ]
|
rBracket, /// ]
|
||||||
rParen, /// $(RPAREN)
|
rParen, /// $(RPAREN)
|
||||||
|
@ -2104,7 +2104,7 @@ enum TokenType: ushort
|
||||||
unsignedShiftRightEqual, /// >>>=
|
unsignedShiftRightEqual, /// >>>=
|
||||||
vararg, /// ...
|
vararg, /// ...
|
||||||
xor, /// ^
|
xor, /// ^
|
||||||
xorEquals, /// ^=
|
xorEqual, /// ^=
|
||||||
|
|
||||||
bool_, /// $(D_KEYWORD bool)
|
bool_, /// $(D_KEYWORD bool)
|
||||||
byte_, /// $(D_KEYWORD byte)
|
byte_, /// $(D_KEYWORD byte)
|
||||||
|
|
Loading…
Reference in New Issue