parser fixes

This commit is contained in:
Hackerpilot 2013-01-27 14:00:14 -08:00
parent c7b84ca0cc
commit 31f59384b9
7 changed files with 449 additions and 459 deletions

View File

@ -1,2 +1,2 @@
dmd *.d std/d/*.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline #dmd *.d std/d/*.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline
#dmd *.d std/d/*.d -g -m64 -w -wi -property -ofdscanner -L-lsqlite3 #-unittest dmd *.d std/d/*.d -g -m64 -w -wi -property -ofdscanner -L-lsqlite3 #-unittest

View File

@ -14,28 +14,28 @@ class CircularBuffer(T) : InputRange!(T)
{ {
public: public:
this (size_t size, InputRange!(T) range) this (size_t size, InputRange!(T) range)
{ {
this.range = range; this.range = range;
this.margin = size; this.margin = size;
data = new T[(margin * 2) + 1]; data = new T[(margin * 2) + 1];
if (range.empty()) if (range.empty())
{ {
_empty = true; _empty = true;
return; return;
} }
for (size_t i = 0; i <= margin && !this.range.empty(); ++i) for (size_t i = 0; i <= margin && !this.range.empty(); ++i)
{ {
data[i] = this.range.front(); data[i] = this.range.front();
this.range.popFront(); this.range.popFront();
end++; end++;
} }
} }
override T front() const @property override T front() @property
{ {
return data[index]; return data[index];
} }
T peek(int offset = 1) T peek(int offset = 1)
in in
@ -52,32 +52,32 @@ public:
return abs(offset) <= margin && sourceIndex + offset >= 0; return abs(offset) <= margin && sourceIndex + offset >= 0;
} }
override void popFront() override void popFront()
in in
{ {
assert (!_empty); assert (!_empty);
} }
body body
{ {
index = (index + 1) % data.length; index = (index + 1) % data.length;
++sourceIndex; ++sourceIndex;
if (range.empty()) if (range.empty())
{ {
if (index == end) if (index == end)
_empty = true; _empty = true;
} }
else else
{ {
data[end] = range.front(); data[end] = range.front();
end = (end + 1) % data.length; end = (end + 1) % data.length;
range.popFront(); range.popFront();
} }
} }
bool empty() const @property bool empty() const @property
{ {
return _empty; return _empty;
} }
override T moveFront() override T moveFront()
{ {
@ -94,6 +94,7 @@ public:
result = dg(front); result = dg(front);
if (result) if (result)
break; break;
popFront();
} }
return result; return result;
} }
@ -107,18 +108,19 @@ public:
result = dg(i, front); result = dg(i, front);
if (result) if (result)
break; break;
popFront();
} }
return result; return result;
} }
private: private:
InputRange!(T) range; InputRange!(T) range;
immutable size_t margin; immutable size_t margin;
T[] data; T[] data;
size_t sourceIndex; size_t sourceIndex;
size_t end; size_t end;
size_t index; size_t index;
bool _empty; bool _empty;
} }
unittest unittest
@ -131,10 +133,10 @@ unittest
unittest unittest
{ {
int[] arr = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; int[] arr = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
auto buf = CircularBuffer!(int, int[])(2, arr); auto buf = CircularBuffer!(int, int[])(2, arr);
assert (buf.data.length == 5); assert (buf.data.length == 5);
auto iterated = array(buf); auto iterated = array(buf);
assert (iterated == arr); assert (iterated == arr);
} }

View File

@ -10,8 +10,6 @@ import std.stdio;
import std.array; import std.array;
import std.d.lexer; import std.d.lexer;
import langutils;
void writeSpan(string cssClass, string value) void writeSpan(string cssClass, string value)
{ {
stdout.write(`<span class="`, cssClass, `">`, value.replace("&", "&amp;").replace("<", "&lt;"), `</span>`); stdout.write(`<span class="`, cssClass, `">`, value.replace("&", "&amp;").replace("<", "&lt;"), `</span>`);

View File

@ -6,6 +6,7 @@
module langutils; module langutils;
import std.array; import std.array;
import std.algorithm;
import std.d.lexer; import std.d.lexer;
@ -43,7 +44,7 @@ string combineTokens(ref const Token[] tokens)
return app.data; return app.data;
} }
pure string getTypeFromToken(const Token t) pure nothrow string getTypeFromToken(const Token t)
{ {
switch (t.type) switch (t.type)
{ {
@ -73,8 +74,14 @@ pure string getTypeFromToken(const Token t)
} }
} }
pure bool isIdentifierOrType(inout Token t) pure bool isIdentifierOrType(const Token t)
{ {
return t.type == TokenType.Identifier || (t.type > TokenType.TYPES_BEGIN return t.type == TokenType.Identifier || (t.type > TokenType.TYPES_BEGIN
&& TokenType.TYPES_END); && TokenType.TYPES_END);
} }
pure bool isDocComment(ref const Token t)
{
return t.value.startsWith("///") || t.value.startsWith("/**")
|| t.value.startsWith("/++");
}

47
main.d
View File

@ -15,6 +15,7 @@ import std.parallelism;
import std.path; import std.path;
import std.regex; import std.regex;
import std.stdio; import std.stdio;
import std.range;
import std.d.lexer; import std.d.lexer;
import autocomplete; import autocomplete;
@ -28,7 +29,7 @@ import circularbuffer;
immutable size_t CIRC_BUFF_SIZE = 4; immutable size_t CIRC_BUFF_SIZE = 4;
pure bool isLineOfCode(TokenType t) pure nothrow bool isLineOfCode(TokenType t)
{ {
switch(t) switch(t)
{ {
@ -138,36 +139,21 @@ int main(string[] args)
{ {
if (args.length == 1) if (args.length == 1)
{ {
auto f = appender!string(); writeln(stdin.byLine(KeepTerminator.yes).join().byToken().count!(a => isLineOfCode(a.type))());
char[] buf;
while (stdin.readln(buf))
f.put(buf);
writeln(f.data.byToken().count!(a => isLineOfCode(a.type))());
} }
else else
{ {
writeln(args[1..$].map!(a => a.readText().byToken())().joiner() writeln(args[1..$].map!(a => File(a).byLine(KeepTerminator.yes).join().byToken())()
.count!(a => isLineOfCode(a.type))()); .joiner().count!(a => isLineOfCode(a.type))());
} }
return 0; return 0;
} }
if (highlight) if (highlight)
{ {
if (args.length == 1) File f = args.length == 1 ? stdin : File(args[1]);
{ highlighter.highlight(f.byLine(KeepTerminator.yes).join().byToken(
auto f = appender!string(); IterationStyle.Everything, StringStyle.Source));
char[] buf;
while (stdin.readln(buf))
f.put(buf);
highlighter.highlight(f.data.byToken(IterationStyle.Everything,
StringStyle.Source));
}
else
{
highlighter.highlight(args[1].readText().byToken(
IterationStyle.Everything, StringStyle.Source));
}
return 0; return 0;
} }
@ -213,20 +199,9 @@ int main(string[] args)
if (json) if (json)
{ {
CircularBuffer!(Token) tokens; CircularBuffer!(Token) tokens;
if (args.length == 1) File f = args.length == 1 ? stdin : File(args[1]);
{ tokens = new CircularBuffer!(Token)(CIRC_BUFF_SIZE,
// Read from stdin f.byLine(KeepTerminator.yes).join().byToken!(char[])());
auto f = appender!string();
char[] buf;
while (stdin.readln(buf))
f.put(buf);
tokens = new CircularBuffer!(Token)(CIRC_BUFF_SIZE, byToken!string(f.data));
}
else
{
// read given file
tokens = new CircularBuffer!(Token)(CIRC_BUFF_SIZE, byToken!string(readText(args[1])));
}
auto mod = parseModule(tokens); auto mod = parseModule(tokens);
mod.writeJSONTo(stdout); mod.writeJSONTo(stdout);
return 0; return 0;

View File

@ -26,6 +26,7 @@ public:
this(InputRange!Token tokens, TokenType open, TokenType close) this(InputRange!Token tokens, TokenType open, TokenType close)
{ {
super(0, tokens); super(0, tokens);
this.range = tokens;
this.open = open; this.open = open;
this.close = close; this.close = close;
} }
@ -35,7 +36,7 @@ public:
return _empty; return _empty;
} }
override Token front() const @property override Token front() @property
{ {
return range.front; return range.front;
} }
@ -47,14 +48,20 @@ public:
++depth; ++depth;
else if (range.front == close) else if (range.front == close)
--depth; --depth;
_empty = depth == 0; _empty = depth == 0 || range.empty;
} }
invariant()
{
assert (range);
assert (depth >= 0);
}
private: private:
int depth; int depth;
TokenType open; TokenType open;
TokenType close; TokenType close;
TokenBuffer range; InputRange!(Token) range;
bool _empty; bool _empty;
} }

View File

@ -1,71 +1,71 @@
// Written in the D programming language // Written in the D programming language
/** /**
* This module contains a range-based lexer for the D programming language. * This module contains a range-based lexer for the D programming language.
* *
* Examples: * Examples:
* *
* Generate HTML markup of D code. * Generate HTML markup of D code.
* --- * ---
* import std.stdio; * import std.stdio;
* import std.array; * import std.array;
* import std.file; * import std.file;
* import std.d.lexer; * import std.d.lexer;
* *
* void writeSpan(string cssClass, string value) * void writeSpan(string cssClass, string value)
* { * {
* stdout.write(`<span class="`, cssClass, `">`, value.replace("&", "&amp;").replace("<", "&lt;"), `</span>`); * stdout.write(`<span class="`, cssClass, `">`, value.replace("&", "&amp;").replace("<", "&lt;"), `</span>`);
* } * }
* *
* void highlight(R)(R tokens) * void highlight(R)(R tokens)
* { * {
* stdout.writeln(q"[<!DOCTYPE html> * stdout.writeln(q"[<!DOCTYPE html>
* <html> * <html>
* <head> * <head>
* <meta http-equiv="content-type" content="text/html; charset=UTF-8"/> * <meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
* <body> * <body>
* <style type="text/css"> * <style type="text/css">
* html { background-color: #fff; color: #222; } * html { background-color: #fff; color: #222; }
* .kwrd { font-weight: bold; color: blue; } * .kwrd { font-weight: bold; color: blue; }
* .com { color: green; font-style: italic;} * .com { color: green; font-style: italic;}
* .num { color: orangered; font-weigth: bold; } * .num { color: orangered; font-weigth: bold; }
* .str { color: red; font-style: italic; } * .str { color: red; font-style: italic; }
* .op { color: 333; font-weight: bold; } * .op { color: 333; font-weight: bold; }
* .type { color: magenta; font-weight: bold; } * .type { color: magenta; font-weight: bold; }
* </style> * </style>
* <pre>]"); * <pre>]");
* *
* foreach (Token t; tokens) * foreach (Token t; tokens)
* { * {
* if (t.type > TokenType.TYPES_BEGIN && t.type < TokenType.TYPES_END) * if (t.type > TokenType.TYPES_BEGIN && t.type < TokenType.TYPES_END)
* writeSpan("type", t.value); * writeSpan("type", t.value);
* else if (t.type > TokenType.KEYWORDS_BEGIN && t.type < TokenType.KEYWORDS_END) * else if (t.type > TokenType.KEYWORDS_BEGIN && t.type < TokenType.KEYWORDS_END)
* writeSpan("kwrd", t.value); * writeSpan("kwrd", t.value);
* else if (t.type == TokenType.Comment) * else if (t.type == TokenType.Comment)
* writeSpan("com", t.value); * writeSpan("com", t.value);
* else if (t.type > TokenType.STRINGS_BEGIN && t.type < TokenType.STRINGS_END) * else if (t.type > TokenType.STRINGS_BEGIN && t.type < TokenType.STRINGS_END)
* writeSpan("str", t.value); * writeSpan("str", t.value);
* else if (t.type > TokenType.NUMBERS_BEGIN && t.type < TokenType.NUMBERS_END) * else if (t.type > TokenType.NUMBERS_BEGIN && t.type < TokenType.NUMBERS_END)
* writeSpan("num", t.value); * writeSpan("num", t.value);
* else if (t.type > TokenType.OPERATORS_BEGIN && t.type < TokenType.OPERATORS_END) * else if (t.type > TokenType.OPERATORS_BEGIN && t.type < TokenType.OPERATORS_END)
* writeSpan("op", t.value); * writeSpan("op", t.value);
* else * else
* stdout.write(t.value.replace("<", "&lt;")); * stdout.write(t.value.replace("<", "&lt;"));
* } * }
* stdout.writeln("</pre>\n</body></html>"); * stdout.writeln("</pre>\n</body></html>");
* } * }
* *
* void main(string[] args) * void main(string[] args)
* { * {
* args[1].readText().byToken(IterationStyle.Everything, StringStyle.Source).highlight(); * args[1].readText().byToken(IterationStyle.Everything, StringStyle.Source).highlight();
* } * }
* --- * ---
* *
* Copyright: Brian Schott 2013 * Copyright: Brian Schott 2013
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0) * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
* Authors: Brian Schott * Authors: Brian Schott
* Source: $(PHOBOSSRC std/d/_lexer.d) * Source: $(PHOBOSSRC std/d/_lexer.d)
*/ */
module std.d.lexer; module std.d.lexer;
@ -81,8 +81,8 @@ import std.d.entities;
public: public:
/** /**
* Represents a D token * Represents a D token
*/ */
struct Token struct Token
{ {
/// The token type. /// The token type.
@ -98,28 +98,28 @@ struct Token
uint startIndex; uint startIndex;
/** /**
* Check to see if the token is of the same type and has the same string * Check to see if the token is of the same type and has the same string
* representation as the given token. * representation as the given token.
*/ */
bool opEquals(ref const(Token) other) const bool opEquals(ref const(Token) other) const
{ {
return other.type == type && other.value == value; return other.type == type && other.value == value;
} }
/** /**
* Checks to see if the token's string representation is equal to the given * Checks to see if the token's string representation is equal to the given
* string. * string.
*/ */
bool opEquals(string value) const { return this.value == value; } bool opEquals(string value) const { return this.value == value; }
/** /**
* Checks to see if the token is of the given type. * Checks to see if the token is of the given type.
*/ */
bool opEquals(TokenType type) const { return type == type; } bool opEquals(TokenType type) const { return type == type; }
/** /**
* Comparison operator orders tokens by start index. * Comparison operator orders tokens by start index.
*/ */
int opCmp(size_t i) const int opCmp(size_t i) const
{ {
if (startIndex < i) return -1; if (startIndex < i) return -1;
@ -129,8 +129,8 @@ struct Token
} }
/** /**
* Configure the behavior of the byToken() function * Configure the behavior of the byToken() function
*/ */
enum IterationStyle enum IterationStyle
{ {
/// Only include code, not whitespace or comments /// Only include code, not whitespace or comments
@ -139,58 +139,59 @@ enum IterationStyle
IncludeComments = 0b0001, IncludeComments = 0b0001,
/// Includes whitespace /// Includes whitespace
IncludeWhitespace = 0b0010, IncludeWhitespace = 0b0010,
/// Include $(LINK2 http://dlang.org/lex.html#specialtokens, special tokens) /// Include $(LINK2 http://dlang.org/lex.html#specialtokens, special tokens)
IncludeSpecialTokens = 0b0100, IncludeSpecialTokens = 0b0100,
/// Include everything /// Include everything
Everything = IncludeComments | IncludeWhitespace Everything = IncludeComments | IncludeWhitespace
} }
/** /**
* Configuration of the string lexing style * Configuration of the string lexing style
*/ */
enum StringStyle : uint enum StringStyle : uint
{ {
/** /**
* Escape sequences will be replaced with their equivalent characters, * Escape sequences will be replaced with their equivalent characters,
* enclosing quote characters will not be included. Useful for creating a * enclosing quote characters will not be included. Useful for creating a
* compiler or interpreter. * compiler or interpreter.
*/ */
Default = 0b0000, Default = 0b0000,
/** /**
* Escape sequences will not be processed. An escaped quote character will * Escape sequences will not be processed. An escaped quote character will
* not terminate string lexing, but it will not be replaced with the quote * not terminate string lexing, but it will not be replaced with the quote
* character in the token. * character in the token.
*/ */
NotEscaped = 0b0001, NotEscaped = 0b0001,
/** /**
* Strings will include their opening and closing quote characters as well * Strings will include their opening and closing quote characters as well
* as any prefixes or suffixes $(LPAREN)e.g.: $(D_STRING "abcde"w) will * as any prefixes or suffixes $(LPAREN)e.g.: $(D_STRING "abcde"w) will
* include the $(D_STRING 'w') character as well as the opening and closing * include the $(D_STRING 'w') character as well as the opening and closing
* quotes$(RPAREN) * quotes$(RPAREN)
*/ */
IncludeQuotes = 0x0010, IncludeQuotes = 0x0010,
/** /**
* Strings will be read exactly as they appeared in the source, including * Strings will be read exactly as they appeared in the source, including
* their opening and closing quote characters. Useful for syntax * their opening and closing quote characters. Useful for syntax
* highlighting. * highlighting.
*/ */
Source = NotEscaped | IncludeQuotes, Source = NotEscaped | IncludeQuotes,
} }
/** /**
* Iterate over the given range of characters by D tokens. * Iterate over the given range of characters by D tokens.
* Params: * Params:
* range = the range of characters * range = the range of characters
* iterationStyle = See IterationStyle * iterationStyle = See IterationStyle
* stringStyle = see StringStyle * stringStyle = see StringStyle
* Returns: * Returns:
* an input range of tokens * an input range of tokens
*/ */
TokenRange!(R) byToken(R)(R range, const IterationStyle iterationStyle = IterationStyle.CodeOnly, TokenRange!(R) byToken(R)(R range, const IterationStyle iterationStyle = IterationStyle.CodeOnly,
const StringStyle stringStyle = StringStyle.Default) if (isForwardRange!(R) && isSomeChar!(ElementType!(R))) const StringStyle stringStyle = StringStyle.Default)
if (isForwardRange!(R) && (isSomeChar!(ElementType!(R)) || is (ElementType!(R) == ubyte)))
{ {
auto r = new TokenRange!(R)(range); auto r = new TokenRange!(R)(range);
r.stringStyle = stringStyle; r.stringStyle = stringStyle;
@ -201,9 +202,9 @@ TokenRange!(R) byToken(R)(R range, const IterationStyle iterationStyle = Iterati
} }
/** /**
* Range of tokens. Avoid creating instances of this manually. Use * Range of tokens. Avoid creating instances of this manually. Use
* $(DDOC_PSYMBOL byToken$(LPAREN)$(RPAREN)) instead, as it does some initialization work. * $(DDOC_PSYMBOL byToken$(LPAREN)$(RPAREN)) instead, as it does some initialization work.
*/ */
class TokenRange(R) : InputRange!(Token) class TokenRange(R) : InputRange!(Token)
{ {
this(ref R range) this(ref R range)
@ -212,16 +213,16 @@ class TokenRange(R) : InputRange!(Token)
} }
/** /**
* Returns: true if the range is empty * Returns: true if the range is empty
*/ */
override bool empty() const @property override bool empty() const @property
{ {
return _empty; return _empty;
} }
/** /**
* Returns: the current token * Returns: the current token
*/ */
override Token front() const @property override Token front() const @property
{ {
enforce(!_empty, "Cannot call front() on empty token range"); enforce(!_empty, "Cannot call front() on empty token range");
@ -229,8 +230,8 @@ class TokenRange(R) : InputRange!(Token)
} }
/** /**
* Returns the current token and then removes it from the range * Returns the current token and then removes it from the range
*/ */
override Token moveFront() override Token moveFront()
{ {
auto r = front(); auto r = front();
@ -265,38 +266,38 @@ class TokenRange(R) : InputRange!(Token)
return result; return result;
} }
override void popFront() override void popFront()
{ {
// Filter out tokens we don't care about // Filter out tokens we don't care about
loop: do loop: do
{ {
advance(); advance();
switch (current.type) switch (current.type)
{ {
case TokenType.Comment: case TokenType.Comment:
if (iterStyle & IterationStyle.IncludeComments) if (iterStyle & IterationStyle.IncludeComments)
break loop; break loop;
break; break;
case TokenType.Whitespace: case TokenType.Whitespace:
if (iterStyle & IterationStyle.IncludeWhitespace) if (iterStyle & IterationStyle.IncludeWhitespace)
break loop; break loop;
break; break;
case TokenType.SpecialTokenSequence: case TokenType.SpecialTokenSequence:
if (iterStyle & IterationStyle.IncludeSpecialTokens) if (iterStyle & IterationStyle.IncludeSpecialTokens)
break loop; break loop;
break; break;
default: default:
break loop; break loop;
} }
} }
while (!empty()); while (!empty());
} }
private: private:
/* /*
* Advances the range to the next token * Advances the range to the next token
*/ */
void advance() void advance()
{ {
if (range.empty) if (range.empty)
@ -311,8 +312,8 @@ private:
if (std.uni.isWhite(range.front)) if (std.uni.isWhite(range.front))
{ {
current = lexWhitespace(range, index, lineNumber); current = lexWhitespace(range, index, lineNumber);
return; return;
} }
outer: switch (range.front) outer: switch (range.front)
{ {
@ -456,22 +457,22 @@ private:
} }
else else
goto default; goto default;
case '#': case '#':
string special = lexSpecialTokenSequence(range, index, lineNumber); string special = lexSpecialTokenSequence(range, index, lineNumber);
if (special) if (special)
{ {
current.type = TokenType.SpecialTokenSequence; current.type = TokenType.SpecialTokenSequence;
current.value = special; current.value = special;
} }
else else
{ {
current.type = TokenType.Hash; current.type = TokenType.Hash;
current.value = "#"; current.value = "#";
range.popFront(); range.popFront();
++index; ++index;
break; break;
} }
break; break;
default: default:
auto app = appender!(ElementType!(R)[])(); auto app = appender!(ElementType!(R)[])();
while(!range.isEoF() && !isSeparating(range.front)) while(!range.isEoF() && !isSeparating(range.front))
@ -497,38 +498,38 @@ private:
unittest unittest
{ {
import std.stdio; import std.stdio;
auto a = "/**comment*/\n#lin #line 10 \"test.d\"\nint a;//test\n"; auto a = "/**comment*/\n#lin #line 10 \"test.d\"\nint a;//test\n";
foreach (t; byToken(a)) foreach (t; byToken(a))
writeln(t); writeln(t);
} }
/** /**
* Listing of all the tokens in the D language. * Listing of all the tokens in the D language.
* *
* Token types are arranged so that it is easy to group tokens while iterating * Token types are arranged so that it is easy to group tokens while iterating
* over them. For example: * over them. For example:
* --- * ---
* assert(TokenType.Increment < TokenType.OPERATORS_END); * assert(TokenType.Increment < TokenType.OPERATORS_END);
* assert(TokenType.Increment > TokenType.OPERATORS_BEGIN); * assert(TokenType.Increment > TokenType.OPERATORS_BEGIN);
* --- * ---
* The non-token values are documented below: * The non-token values are documented below:
* *
* $(BOOKTABLE , * $(BOOKTABLE ,
* $(TR $(TH Begin) $(TH End) $(TH Content) $(TH Examples)) * $(TR $(TH Begin) $(TH End) $(TH Content) $(TH Examples))
* $(TR $(TD OPERATORS_BEGIN) $(TD OPERATORS_END) $(TD operatiors) $(TD +, -, <<=)) * $(TR $(TD OPERATORS_BEGIN) $(TD OPERATORS_END) $(TD operatiors) $(TD +, -, <<=))
* $(TR $(TD TYPES_BEGIN) $(TD TYPES_END) $(TD types) $(TD bool, char, double)) * $(TR $(TD TYPES_BEGIN) $(TD TYPES_END) $(TD types) $(TD bool, char, double))
* $(TR $(TD KEYWORDS_BEGIN) $(TD KEYWORDS) $(TD keywords) $(TD class, if, assert)) * $(TR $(TD KEYWORDS_BEGIN) $(TD KEYWORDS) $(TD keywords) $(TD class, if, assert))
* $(TR $(TD ATTRIBUTES_BEGIN) $(TD ATTRIBUTES_END) $(TD attributes) $(TD override synchronized, __gshared)) * $(TR $(TD ATTRIBUTES_BEGIN) $(TD ATTRIBUTES_END) $(TD attributes) $(TD override synchronized, __gshared))
* $(TR $(TD ATTRIBUTES_BEGIN) $(TD ATTRIBUTES_END) $(TD protection) $(TD public, protected)) * $(TR $(TD ATTRIBUTES_BEGIN) $(TD ATTRIBUTES_END) $(TD protection) $(TD public, protected))
* $(TR $(TD CONSTANTS_BEGIN) $(TD CONSTANTS_END) $(TD compile-time constants) $(TD __FILE__, __TIME__)) * $(TR $(TD CONSTANTS_BEGIN) $(TD CONSTANTS_END) $(TD compile-time constants) $(TD __FILE__, __TIME__))
* $(TR $(TD LITERALS_BEGIN) $(TD LITERALS_END) $(TD string and numeric literals) $(TD "str", 123)) * $(TR $(TD LITERALS_BEGIN) $(TD LITERALS_END) $(TD string and numeric literals) $(TD "str", 123))
* $(TR $(TD NUMBERS_BEGIN) $(TD NUMBERS_END) $(TD numeric literals) $(TD 0x123p+9, 0b0110)) * $(TR $(TD NUMBERS_BEGIN) $(TD NUMBERS_END) $(TD numeric literals) $(TD 0x123p+9, 0b0110))
* $(TR $(TD STRINGS_BEGIN) $(TD STRINGS_END) $(TD string literals) $(TD `123`c, q{tokens;}, "abcde")) * $(TR $(TD STRINGS_BEGIN) $(TD STRINGS_END) $(TD string literals) $(TD `123`c, q{tokens;}, "abcde"))
* $(TR $(TD MISC_BEGIN) $(TD MISC_END) $(TD anything else) $(TD whitespace, comments, identifiers)) * $(TR $(TD MISC_BEGIN) $(TD MISC_END) $(TD anything else) $(TD whitespace, comments, identifiers))
* ) * )
* Note that several of the above ranges overlap. * Note that several of the above ranges overlap.
*/ */
enum TokenType: uint enum TokenType: uint
{ {
// Operators // Operators
@ -599,38 +600,38 @@ enum TokenType: uint
OPERATORS_END, /// OPERATORS_END, ///
// Keywords // Keywords
KEYWORDS_BEGIN, /// KEYWORDS_BEGIN, ///
TYPES_BEGIN, /// TYPES_BEGIN, ///
Bool, /// $(D_KEYWORD bool) Bool, /// $(D_KEYWORD bool)
Byte, /// $(D_KEYWORD byte) Byte, /// $(D_KEYWORD byte)
Cdouble, /// $(D_KEYWORD cdouble) Cdouble, /// $(D_KEYWORD cdouble)
Cent, /// $(D_KEYWORD cent) Cent, /// $(D_KEYWORD cent)
Cfloat, /// $(D_KEYWORD cfloat) Cfloat, /// $(D_KEYWORD cfloat)
Char, /// $(D_KEYWORD char) Char, /// $(D_KEYWORD char)
Creal, /// $(D_KEYWORD creal) Creal, /// $(D_KEYWORD creal)
Dchar, /// $(D_KEYWORD dchar) Dchar, /// $(D_KEYWORD dchar)
Double, /// $(D_KEYWORD double) Double, /// $(D_KEYWORD double)
DString, /// $(D_KEYWORD dstring) DString, /// $(D_KEYWORD dstring)
Float, /// $(D_KEYWORD float) Float, /// $(D_KEYWORD float)
Function, /// $(D_KEYWORD function) Function, /// $(D_KEYWORD function)
Idouble, /// $(D_KEYWORD idouble) Idouble, /// $(D_KEYWORD idouble)
Ifloat, /// $(D_KEYWORD ifloat) Ifloat, /// $(D_KEYWORD ifloat)
Int, /// $(D_KEYWORD int) Int, /// $(D_KEYWORD int)
Ireal, /// $(D_KEYWORD ireal) Ireal, /// $(D_KEYWORD ireal)
Long, /// $(D_KEYWORD long) Long, /// $(D_KEYWORD long)
Real, /// $(D_KEYWORD real) Real, /// $(D_KEYWORD real)
Short, /// $(D_KEYWORD short) Short, /// $(D_KEYWORD short)
String, /// $(D_KEYWORD string) String, /// $(D_KEYWORD string)
Ubyte, /// $(D_KEYWORD ubyte) Ubyte, /// $(D_KEYWORD ubyte)
Ucent, /// $(D_KEYWORD ucent) Ucent, /// $(D_KEYWORD ucent)
Uint, /// $(D_KEYWORD uint) Uint, /// $(D_KEYWORD uint)
Ulong, /// $(D_KEYWORD ulong) Ulong, /// $(D_KEYWORD ulong)
Ushort, /// $(D_KEYWORD ushort) Ushort, /// $(D_KEYWORD ushort)
Void, /// $(D_KEYWORD void) Void, /// $(D_KEYWORD void)
Wchar, /// $(D_KEYWORD wchar) Wchar, /// $(D_KEYWORD wchar)
WString, /// $(D_KEYWORD wstring) WString, /// $(D_KEYWORD wstring)
TYPES_END, /// TYPES_END, ///
ATTRIBUTES_BEGIN, /// ATTRIBUTES_BEGIN, ///
Align, /// $(D_KEYWORD align) Align, /// $(D_KEYWORD align)
Deprecated, /// $(D_KEYWORD deprecated) Deprecated, /// $(D_KEYWORD deprecated)
@ -699,7 +700,7 @@ enum TokenType: uint
Struct, /// $(D_KEYWORD struct) Struct, /// $(D_KEYWORD struct)
Super, /// $(D_KEYWORD super) Super, /// $(D_KEYWORD super)
Switch, /// $(D_KEYWORD switch) Switch, /// $(D_KEYWORD switch)
Template, /// $(D_KEYWORD template) Template, /// $(D_KEYWORD template)
This, /// $(D_KEYWORD this) This, /// $(D_KEYWORD this)
Throw, /// $(D_KEYWORD throw) Throw, /// $(D_KEYWORD throw)
True, /// $(D_KEYWORD true) True, /// $(D_KEYWORD true)
@ -729,7 +730,7 @@ enum TokenType: uint
Identifier, /// anything else Identifier, /// anything else
ScriptLine, // Line at the beginning of source file that starts from #! ScriptLine, // Line at the beginning of source file that starts from #!
Whitespace, /// whitespace Whitespace, /// whitespace
SpecialTokenSequence, /// #line 10 "file.d" SpecialTokenSequence, /// #line 10 "file.d"
MISC_END, /// MISC_END, ///
// Literals // Literals
@ -1122,53 +1123,53 @@ Token lexHexString(R, C = ElementType!R)(ref R input, ref uint index, ref uint l
const StringStyle style = StringStyle.Default) const StringStyle style = StringStyle.Default)
in in
{ {
assert (input.front == 'x'); assert (input.front == 'x');
} }
body body
{ {
Token t; Token t;
t.lineNumber = lineNumber; t.lineNumber = lineNumber;
t.startIndex = index; t.startIndex = index;
t.type = TokenType.StringLiteral; t.type = TokenType.StringLiteral;
auto app = appender!(C[])(); auto app = appender!(C[])();
if (style & StringStyle.IncludeQuotes) if (style & StringStyle.IncludeQuotes)
app.put("x\""); app.put("x\"");
input.popFront(); input.popFront();
input.popFront(); input.popFront();
index += 2; index += 2;
while (!input.isEoF()) while (!input.isEoF())
{ {
if (isNewline(input)) if (isNewline(input))
{ {
app.put(popNewline(input, index)); app.put(popNewline(input, index));
++lineNumber; ++lineNumber;
} }
else if (isHexDigit(input.front)) else if (isHexDigit(input.front))
{ {
app.put(input.front); app.put(input.front);
input.popFront(); input.popFront();
++index; ++index;
} }
else if (std.uni.isWhite(input.front) && (style & StringStyle.NotEscaped)) else if (std.uni.isWhite(input.front) && (style & StringStyle.NotEscaped))
{ {
app.put(input.front); app.put(input.front);
input.popFront(); input.popFront();
++index; ++index;
} }
else if (input.front == '"') else if (input.front == '"')
{ {
if (style & StringStyle.IncludeQuotes) if (style & StringStyle.IncludeQuotes)
app.put('"'); app.put('"');
input.popFront(); input.popFront();
++index; ++index;
break; break;
} }
else else
{ {
// This is an error // This is an error
} }
} }
if (!input.isEoF()) if (!input.isEoF())
{ {
switch (input.front) switch (input.front)
{ {
@ -1188,43 +1189,43 @@ body
break; break;
} }
} }
if (style & StringStyle.NotEscaped) if (style & StringStyle.NotEscaped)
t.value = to!string(app.data); t.value = to!string(app.data);
else else
{ {
auto a = appender!(char[])(); auto a = appender!(char[])();
foreach (b; std.range.chunks(app.data, 2)) foreach (b; std.range.chunks(app.data, 2))
a.put(to!string(cast(dchar) parse!uint(b, 16))); a.put(to!string(cast(dchar) parse!uint(b, 16)));
t.value = to!string(a.data); t.value = to!string(a.data);
} }
return t; return t;
} }
unittest unittest
{ {
uint i; uint i;
uint l; uint l;
auto a = `x"204041"`; auto a = `x"204041"`;
auto ar = lexHexString(a, i, l); auto ar = lexHexString(a, i, l);
assert (ar == " @A"); assert (ar == " @A");
assert (ar == TokenType.StringLiteral); assert (ar == TokenType.StringLiteral);
auto b = `x"20"w`; auto b = `x"20"w`;
auto br = lexHexString(b, i, l); auto br = lexHexString(b, i, l);
assert (br == " "); assert (br == " ");
assert (br == TokenType.WStringLiteral); assert (br == TokenType.WStringLiteral);
auto c = `x"6d"`; auto c = `x"6d"`;
auto cr = lexHexString(c, i, l, StringStyle.NotEscaped); auto cr = lexHexString(c, i, l, StringStyle.NotEscaped);
assert (cr == "6d"); assert (cr == "6d");
auto d = `x"5e5f"d`; auto d = `x"5e5f"d`;
auto dr = lexHexString(d, i, l, StringStyle.NotEscaped | StringStyle.IncludeQuotes); auto dr = lexHexString(d, i, l, StringStyle.NotEscaped | StringStyle.IncludeQuotes);
assert (dr == `x"5e5f"d`); assert (dr == `x"5e5f"d`);
assert (dr == TokenType.DStringLiteral); assert (dr == TokenType.DStringLiteral);
} }
Token lexString(R)(ref R input, ref uint index, ref uint lineNumber, Token lexString(R)(ref R input, ref uint index, ref uint lineNumber,
@ -1582,7 +1583,7 @@ body
unittest unittest
{ {
import std.stdio; import std.stdio;
uint i; uint i;
uint l; uint l;
auto a = "q{import std.stdio;} abcd"; auto a = "q{import std.stdio;} abcd";
@ -2178,106 +2179,106 @@ unittest
} }
string lexSpecialTokenSequence(R)(ref R input, ref uint index, string lexSpecialTokenSequence(R)(ref R input, ref uint index,
ref uint lineNumber) ref uint lineNumber)
in in
{ {
assert (input.front == '#'); assert (input.front == '#');
} }
body body
{ {
auto i = index; auto i = index;
auto r = input.save; auto r = input.save;
auto l = lineNumber; auto l = lineNumber;
r.popFront(); r.popFront();
++i; ++i;
auto app = appender!(ElementType!(R)[])(); auto app = appender!(ElementType!(R)[])();
app.put('#'); app.put('#');
auto specialType = appender!(ElementType!(R)[])(); auto specialType = appender!(ElementType!(R)[])();
while (!r.empty && !isSeparating(r.front)) while (!r.empty && !isSeparating(r.front))
{ {
specialType.put(r.front); specialType.put(r.front);
++i; ++i;
r.popFront(); r.popFront();
} }
if (to!string(specialType.data) != "line") if (to!string(specialType.data) != "line")
return null; return null;
app.put(specialType.data); app.put(specialType.data);
if (std.uni.isWhite(r.front)) if (std.uni.isWhite(r.front))
app.put(lexWhitespace(r, i, l).value); app.put(lexWhitespace(r, i, l).value);
if (!isDigit(r.front)) if (!isDigit(r.front))
return null; return null;
auto t = lexNumber(r, i, l); auto t = lexNumber(r, i, l);
if (t != TokenType.IntLiteral) if (t != TokenType.IntLiteral)
return null; return null;
app.put(t.value); app.put(t.value);
l = to!uint(t.value); l = to!uint(t.value);
if (!isNewline(r)) if (!isNewline(r))
{ {
if (!r.empty && std.uni.isWhite(r.front)) if (!r.empty && std.uni.isWhite(r.front))
app.put(lexWhitespace(r, i, l).value); app.put(lexWhitespace(r, i, l).value);
if (!r.empty && r.front == '"') if (!r.empty && r.front == '"')
{ {
auto fSpecApp = appender!(ElementType!(R)[])(); auto fSpecApp = appender!(ElementType!(R)[])();
fSpecApp.put(r.front); fSpecApp.put(r.front);
r.popFront(); r.popFront();
++i; ++i;
while (!r.empty) while (!r.empty)
{ {
if (r.front == '"') if (r.front == '"')
{ {
fSpecApp.put('"'); fSpecApp.put('"');
++i; ++i;
r.popFront(); r.popFront();
break; break;
} }
++i; ++i;
fSpecApp.put(r.front); fSpecApp.put(r.front);
r.popFront(); r.popFront();
} }
app.put(fSpecApp.data); app.put(fSpecApp.data);
} }
else else
return null; return null;
} }
app.put(popNewline(r, i)); app.put(popNewline(r, i));
input.popFrontN(i - index); input.popFrontN(i - index);
index = i; index = i;
lineNumber = l; lineNumber = l;
return to!string(app.data); return to!string(app.data);
} }
unittest unittest
{ {
uint i; uint i;
uint l; uint l;
auto a = "#line 10\n"; auto a = "#line 10\n";
auto ar = lexSpecialTokenSequence(a, i, l); auto ar = lexSpecialTokenSequence(a, i, l);
assert (ar == "#line 10\n"); assert (ar == "#line 10\n");
assert (a == ""); assert (a == "");
assert (l == 10); assert (l == 10);
auto b = "#line 9201 \"test.d\"\n"; auto b = "#line 9201 \"test.d\"\n";
auto br = lexSpecialTokenSequence(b, i, l); auto br = lexSpecialTokenSequence(b, i, l);
assert (l == 9201); assert (l == 9201);
assert (br == "#line 9201 \"test.d\"\n"); assert (br == "#line 9201 \"test.d\"\n");
assert (b == ""); assert (b == "");
auto c = `#lin`; auto c = `#lin`;
auto cr = lexSpecialTokenSequence(c, i, l); auto cr = lexSpecialTokenSequence(c, i, l);
assert (l == 9201); assert (l == 9201);
assert (cr is null); assert (cr is null);
assert (c == `#lin`); assert (c == `#lin`);
} }
pure nothrow bool isSeparating(C)(C ch) if (isSomeChar!C) pure nothrow bool isSeparating(C)(C ch) if (isSomeChar!C)
@ -2477,8 +2478,8 @@ pure nothrow TokenType lookupTokenType(const string input)
class Trie(K, V) if (isInputRange!K): TrieNode!(K, V) class Trie(K, V) if (isInputRange!K): TrieNode!(K, V)
{ {
/** /**
* Adds the given value to the trie with the given key * Adds the given value to the trie with the given key
*/ */
void add(K key, V value) pure void add(K key, V value) pure
{ {
TrieNode!(K,V) current = this; TrieNode!(K,V) current = this;