parser fixes
This commit is contained in:
parent
c7b84ca0cc
commit
31f59384b9
4
build.sh
4
build.sh
|
@ -1,2 +1,2 @@
|
|||
dmd *.d std/d/*.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline
|
||||
#dmd *.d std/d/*.d -g -m64 -w -wi -property -ofdscanner -L-lsqlite3 #-unittest
|
||||
#dmd *.d std/d/*.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline
|
||||
dmd *.d std/d/*.d -g -m64 -w -wi -property -ofdscanner -L-lsqlite3 #-unittest
|
||||
|
|
|
@ -32,7 +32,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
override T front() const @property
|
||||
override T front() @property
|
||||
{
|
||||
return data[index];
|
||||
}
|
||||
|
@ -94,6 +94,7 @@ public:
|
|||
result = dg(front);
|
||||
if (result)
|
||||
break;
|
||||
popFront();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
@ -107,6 +108,7 @@ public:
|
|||
result = dg(i, front);
|
||||
if (result)
|
||||
break;
|
||||
popFront();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -10,8 +10,6 @@ import std.stdio;
|
|||
import std.array;
|
||||
import std.d.lexer;
|
||||
|
||||
import langutils;
|
||||
|
||||
void writeSpan(string cssClass, string value)
|
||||
{
|
||||
stdout.write(`<span class="`, cssClass, `">`, value.replace("&", "&").replace("<", "<"), `</span>`);
|
||||
|
|
11
langutils.d
11
langutils.d
|
@ -6,6 +6,7 @@
|
|||
module langutils;
|
||||
|
||||
import std.array;
|
||||
import std.algorithm;
|
||||
import std.d.lexer;
|
||||
|
||||
|
||||
|
@ -43,7 +44,7 @@ string combineTokens(ref const Token[] tokens)
|
|||
return app.data;
|
||||
}
|
||||
|
||||
pure string getTypeFromToken(const Token t)
|
||||
pure nothrow string getTypeFromToken(const Token t)
|
||||
{
|
||||
switch (t.type)
|
||||
{
|
||||
|
@ -73,8 +74,14 @@ pure string getTypeFromToken(const Token t)
|
|||
}
|
||||
}
|
||||
|
||||
pure bool isIdentifierOrType(inout Token t)
|
||||
pure bool isIdentifierOrType(const Token t)
|
||||
{
|
||||
return t.type == TokenType.Identifier || (t.type > TokenType.TYPES_BEGIN
|
||||
&& TokenType.TYPES_END);
|
||||
}
|
||||
|
||||
pure bool isDocComment(ref const Token t)
|
||||
{
|
||||
return t.value.startsWith("///") || t.value.startsWith("/**")
|
||||
|| t.value.startsWith("/++");
|
||||
}
|
||||
|
|
45
main.d
45
main.d
|
@ -15,6 +15,7 @@ import std.parallelism;
|
|||
import std.path;
|
||||
import std.regex;
|
||||
import std.stdio;
|
||||
import std.range;
|
||||
import std.d.lexer;
|
||||
|
||||
import autocomplete;
|
||||
|
@ -28,7 +29,7 @@ import circularbuffer;
|
|||
|
||||
immutable size_t CIRC_BUFF_SIZE = 4;
|
||||
|
||||
pure bool isLineOfCode(TokenType t)
|
||||
pure nothrow bool isLineOfCode(TokenType t)
|
||||
{
|
||||
switch(t)
|
||||
{
|
||||
|
@ -138,36 +139,21 @@ int main(string[] args)
|
|||
{
|
||||
if (args.length == 1)
|
||||
{
|
||||
auto f = appender!string();
|
||||
char[] buf;
|
||||
while (stdin.readln(buf))
|
||||
f.put(buf);
|
||||
writeln(f.data.byToken().count!(a => isLineOfCode(a.type))());
|
||||
writeln(stdin.byLine(KeepTerminator.yes).join().byToken().count!(a => isLineOfCode(a.type))());
|
||||
}
|
||||
else
|
||||
{
|
||||
writeln(args[1..$].map!(a => a.readText().byToken())().joiner()
|
||||
.count!(a => isLineOfCode(a.type))());
|
||||
writeln(args[1..$].map!(a => File(a).byLine(KeepTerminator.yes).join().byToken())()
|
||||
.joiner().count!(a => isLineOfCode(a.type))());
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (highlight)
|
||||
{
|
||||
if (args.length == 1)
|
||||
{
|
||||
auto f = appender!string();
|
||||
char[] buf;
|
||||
while (stdin.readln(buf))
|
||||
f.put(buf);
|
||||
highlighter.highlight(f.data.byToken(IterationStyle.Everything,
|
||||
StringStyle.Source));
|
||||
}
|
||||
else
|
||||
{
|
||||
highlighter.highlight(args[1].readText().byToken(
|
||||
File f = args.length == 1 ? stdin : File(args[1]);
|
||||
highlighter.highlight(f.byLine(KeepTerminator.yes).join().byToken(
|
||||
IterationStyle.Everything, StringStyle.Source));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -213,20 +199,9 @@ int main(string[] args)
|
|||
if (json)
|
||||
{
|
||||
CircularBuffer!(Token) tokens;
|
||||
if (args.length == 1)
|
||||
{
|
||||
// Read from stdin
|
||||
auto f = appender!string();
|
||||
char[] buf;
|
||||
while (stdin.readln(buf))
|
||||
f.put(buf);
|
||||
tokens = new CircularBuffer!(Token)(CIRC_BUFF_SIZE, byToken!string(f.data));
|
||||
}
|
||||
else
|
||||
{
|
||||
// read given file
|
||||
tokens = new CircularBuffer!(Token)(CIRC_BUFF_SIZE, byToken!string(readText(args[1])));
|
||||
}
|
||||
File f = args.length == 1 ? stdin : File(args[1]);
|
||||
tokens = new CircularBuffer!(Token)(CIRC_BUFF_SIZE,
|
||||
f.byLine(KeepTerminator.yes).join().byToken!(char[])());
|
||||
auto mod = parseModule(tokens);
|
||||
mod.writeJSONTo(stdout);
|
||||
return 0;
|
||||
|
|
13
parser.d
13
parser.d
|
@ -26,6 +26,7 @@ public:
|
|||
this(InputRange!Token tokens, TokenType open, TokenType close)
|
||||
{
|
||||
super(0, tokens);
|
||||
this.range = tokens;
|
||||
this.open = open;
|
||||
this.close = close;
|
||||
}
|
||||
|
@ -35,7 +36,7 @@ public:
|
|||
return _empty;
|
||||
}
|
||||
|
||||
override Token front() const @property
|
||||
override Token front() @property
|
||||
{
|
||||
return range.front;
|
||||
}
|
||||
|
@ -47,14 +48,20 @@ public:
|
|||
++depth;
|
||||
else if (range.front == close)
|
||||
--depth;
|
||||
_empty = depth == 0;
|
||||
_empty = depth == 0 || range.empty;
|
||||
}
|
||||
|
||||
invariant()
|
||||
{
|
||||
assert (range);
|
||||
assert (depth >= 0);
|
||||
}
|
||||
|
||||
private:
|
||||
int depth;
|
||||
TokenType open;
|
||||
TokenType close;
|
||||
TokenBuffer range;
|
||||
InputRange!(Token) range;
|
||||
bool _empty;
|
||||
}
|
||||
|
||||
|
|
217
std/d/lexer.d
217
std/d/lexer.d
|
@ -1,71 +1,71 @@
|
|||
// Written in the D programming language
|
||||
|
||||
/**
|
||||
* This module contains a range-based lexer for the D programming language.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* Generate HTML markup of D code.
|
||||
* ---
|
||||
* import std.stdio;
|
||||
* import std.array;
|
||||
* import std.file;
|
||||
* import std.d.lexer;
|
||||
*
|
||||
* void writeSpan(string cssClass, string value)
|
||||
* {
|
||||
* stdout.write(`<span class="`, cssClass, `">`, value.replace("&", "&").replace("<", "<"), `</span>`);
|
||||
* }
|
||||
*
|
||||
* void highlight(R)(R tokens)
|
||||
* {
|
||||
* stdout.writeln(q"[<!DOCTYPE html>
|
||||
* <html>
|
||||
* <head>
|
||||
* <meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
|
||||
* <body>
|
||||
* <style type="text/css">
|
||||
* html { background-color: #fff; color: #222; }
|
||||
* .kwrd { font-weight: bold; color: blue; }
|
||||
* .com { color: green; font-style: italic;}
|
||||
* .num { color: orangered; font-weigth: bold; }
|
||||
* .str { color: red; font-style: italic; }
|
||||
* .op { color: 333; font-weight: bold; }
|
||||
* .type { color: magenta; font-weight: bold; }
|
||||
* </style>
|
||||
* <pre>]");
|
||||
*
|
||||
* foreach (Token t; tokens)
|
||||
* {
|
||||
* if (t.type > TokenType.TYPES_BEGIN && t.type < TokenType.TYPES_END)
|
||||
* writeSpan("type", t.value);
|
||||
* else if (t.type > TokenType.KEYWORDS_BEGIN && t.type < TokenType.KEYWORDS_END)
|
||||
* writeSpan("kwrd", t.value);
|
||||
* else if (t.type == TokenType.Comment)
|
||||
* writeSpan("com", t.value);
|
||||
* else if (t.type > TokenType.STRINGS_BEGIN && t.type < TokenType.STRINGS_END)
|
||||
* writeSpan("str", t.value);
|
||||
* else if (t.type > TokenType.NUMBERS_BEGIN && t.type < TokenType.NUMBERS_END)
|
||||
* writeSpan("num", t.value);
|
||||
* else if (t.type > TokenType.OPERATORS_BEGIN && t.type < TokenType.OPERATORS_END)
|
||||
* writeSpan("op", t.value);
|
||||
* else
|
||||
* stdout.write(t.value.replace("<", "<"));
|
||||
* }
|
||||
* stdout.writeln("</pre>\n</body></html>");
|
||||
* }
|
||||
*
|
||||
* void main(string[] args)
|
||||
* {
|
||||
* args[1].readText().byToken(IterationStyle.Everything, StringStyle.Source).highlight();
|
||||
* }
|
||||
* ---
|
||||
*
|
||||
* Copyright: Brian Schott 2013
|
||||
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
|
||||
* Authors: Brian Schott
|
||||
* Source: $(PHOBOSSRC std/d/_lexer.d)
|
||||
*/
|
||||
* This module contains a range-based lexer for the D programming language.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* Generate HTML markup of D code.
|
||||
* ---
|
||||
* import std.stdio;
|
||||
* import std.array;
|
||||
* import std.file;
|
||||
* import std.d.lexer;
|
||||
*
|
||||
* void writeSpan(string cssClass, string value)
|
||||
* {
|
||||
* stdout.write(`<span class="`, cssClass, `">`, value.replace("&", "&").replace("<", "<"), `</span>`);
|
||||
* }
|
||||
*
|
||||
* void highlight(R)(R tokens)
|
||||
* {
|
||||
* stdout.writeln(q"[<!DOCTYPE html>
|
||||
* <html>
|
||||
* <head>
|
||||
* <meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
|
||||
* <body>
|
||||
* <style type="text/css">
|
||||
* html { background-color: #fff; color: #222; }
|
||||
* .kwrd { font-weight: bold; color: blue; }
|
||||
* .com { color: green; font-style: italic;}
|
||||
* .num { color: orangered; font-weigth: bold; }
|
||||
* .str { color: red; font-style: italic; }
|
||||
* .op { color: 333; font-weight: bold; }
|
||||
* .type { color: magenta; font-weight: bold; }
|
||||
* </style>
|
||||
* <pre>]");
|
||||
*
|
||||
* foreach (Token t; tokens)
|
||||
* {
|
||||
* if (t.type > TokenType.TYPES_BEGIN && t.type < TokenType.TYPES_END)
|
||||
* writeSpan("type", t.value);
|
||||
* else if (t.type > TokenType.KEYWORDS_BEGIN && t.type < TokenType.KEYWORDS_END)
|
||||
* writeSpan("kwrd", t.value);
|
||||
* else if (t.type == TokenType.Comment)
|
||||
* writeSpan("com", t.value);
|
||||
* else if (t.type > TokenType.STRINGS_BEGIN && t.type < TokenType.STRINGS_END)
|
||||
* writeSpan("str", t.value);
|
||||
* else if (t.type > TokenType.NUMBERS_BEGIN && t.type < TokenType.NUMBERS_END)
|
||||
* writeSpan("num", t.value);
|
||||
* else if (t.type > TokenType.OPERATORS_BEGIN && t.type < TokenType.OPERATORS_END)
|
||||
* writeSpan("op", t.value);
|
||||
* else
|
||||
* stdout.write(t.value.replace("<", "<"));
|
||||
* }
|
||||
* stdout.writeln("</pre>\n</body></html>");
|
||||
* }
|
||||
*
|
||||
* void main(string[] args)
|
||||
* {
|
||||
* args[1].readText().byToken(IterationStyle.Everything, StringStyle.Source).highlight();
|
||||
* }
|
||||
* ---
|
||||
*
|
||||
* Copyright: Brian Schott 2013
|
||||
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
|
||||
* Authors: Brian Schott
|
||||
* Source: $(PHOBOSSRC std/d/_lexer.d)
|
||||
*/
|
||||
|
||||
module std.d.lexer;
|
||||
|
||||
|
@ -81,8 +81,8 @@ import std.d.entities;
|
|||
public:
|
||||
|
||||
/**
|
||||
* Represents a D token
|
||||
*/
|
||||
* Represents a D token
|
||||
*/
|
||||
struct Token
|
||||
{
|
||||
/// The token type.
|
||||
|
@ -129,8 +129,8 @@ struct Token
|
|||
}
|
||||
|
||||
/**
|
||||
* Configure the behavior of the byToken() function
|
||||
*/
|
||||
* Configure the behavior of the byToken() function
|
||||
*/
|
||||
enum IterationStyle
|
||||
{
|
||||
/// Only include code, not whitespace or comments
|
||||
|
@ -146,8 +146,8 @@ enum IterationStyle
|
|||
}
|
||||
|
||||
/**
|
||||
* Configuration of the string lexing style
|
||||
*/
|
||||
* Configuration of the string lexing style
|
||||
*/
|
||||
enum StringStyle : uint
|
||||
{
|
||||
/**
|
||||
|
@ -181,16 +181,17 @@ enum StringStyle : uint
|
|||
}
|
||||
|
||||
/**
|
||||
* Iterate over the given range of characters by D tokens.
|
||||
* Params:
|
||||
* range = the range of characters
|
||||
* iterationStyle = See IterationStyle
|
||||
* stringStyle = see StringStyle
|
||||
* Returns:
|
||||
* an input range of tokens
|
||||
*/
|
||||
* Iterate over the given range of characters by D tokens.
|
||||
* Params:
|
||||
* range = the range of characters
|
||||
* iterationStyle = See IterationStyle
|
||||
* stringStyle = see StringStyle
|
||||
* Returns:
|
||||
* an input range of tokens
|
||||
*/
|
||||
TokenRange!(R) byToken(R)(R range, const IterationStyle iterationStyle = IterationStyle.CodeOnly,
|
||||
const StringStyle stringStyle = StringStyle.Default) if (isForwardRange!(R) && isSomeChar!(ElementType!(R)))
|
||||
const StringStyle stringStyle = StringStyle.Default)
|
||||
if (isForwardRange!(R) && (isSomeChar!(ElementType!(R)) || is (ElementType!(R) == ubyte)))
|
||||
{
|
||||
auto r = new TokenRange!(R)(range);
|
||||
r.stringStyle = stringStyle;
|
||||
|
@ -201,9 +202,9 @@ TokenRange!(R) byToken(R)(R range, const IterationStyle iterationStyle = Iterati
|
|||
}
|
||||
|
||||
/**
|
||||
* Range of tokens. Avoid creating instances of this manually. Use
|
||||
* $(DDOC_PSYMBOL byToken$(LPAREN)$(RPAREN)) instead, as it does some initialization work.
|
||||
*/
|
||||
* Range of tokens. Avoid creating instances of this manually. Use
|
||||
* $(DDOC_PSYMBOL byToken$(LPAREN)$(RPAREN)) instead, as it does some initialization work.
|
||||
*/
|
||||
class TokenRange(R) : InputRange!(Token)
|
||||
{
|
||||
this(ref R range)
|
||||
|
@ -504,31 +505,31 @@ unittest
|
|||
}
|
||||
|
||||
/**
|
||||
* Listing of all the tokens in the D language.
|
||||
*
|
||||
* Token types are arranged so that it is easy to group tokens while iterating
|
||||
* over them. For example:
|
||||
* ---
|
||||
* assert(TokenType.Increment < TokenType.OPERATORS_END);
|
||||
* assert(TokenType.Increment > TokenType.OPERATORS_BEGIN);
|
||||
* ---
|
||||
* The non-token values are documented below:
|
||||
*
|
||||
* $(BOOKTABLE ,
|
||||
* $(TR $(TH Begin) $(TH End) $(TH Content) $(TH Examples))
|
||||
* $(TR $(TD OPERATORS_BEGIN) $(TD OPERATORS_END) $(TD operatiors) $(TD +, -, <<=))
|
||||
* $(TR $(TD TYPES_BEGIN) $(TD TYPES_END) $(TD types) $(TD bool, char, double))
|
||||
* $(TR $(TD KEYWORDS_BEGIN) $(TD KEYWORDS) $(TD keywords) $(TD class, if, assert))
|
||||
* $(TR $(TD ATTRIBUTES_BEGIN) $(TD ATTRIBUTES_END) $(TD attributes) $(TD override synchronized, __gshared))
|
||||
* $(TR $(TD ATTRIBUTES_BEGIN) $(TD ATTRIBUTES_END) $(TD protection) $(TD public, protected))
|
||||
* $(TR $(TD CONSTANTS_BEGIN) $(TD CONSTANTS_END) $(TD compile-time constants) $(TD __FILE__, __TIME__))
|
||||
* $(TR $(TD LITERALS_BEGIN) $(TD LITERALS_END) $(TD string and numeric literals) $(TD "str", 123))
|
||||
* $(TR $(TD NUMBERS_BEGIN) $(TD NUMBERS_END) $(TD numeric literals) $(TD 0x123p+9, 0b0110))
|
||||
* $(TR $(TD STRINGS_BEGIN) $(TD STRINGS_END) $(TD string literals) $(TD `123`c, q{tokens;}, "abcde"))
|
||||
* $(TR $(TD MISC_BEGIN) $(TD MISC_END) $(TD anything else) $(TD whitespace, comments, identifiers))
|
||||
* )
|
||||
* Note that several of the above ranges overlap.
|
||||
*/
|
||||
* Listing of all the tokens in the D language.
|
||||
*
|
||||
* Token types are arranged so that it is easy to group tokens while iterating
|
||||
* over them. For example:
|
||||
* ---
|
||||
* assert(TokenType.Increment < TokenType.OPERATORS_END);
|
||||
* assert(TokenType.Increment > TokenType.OPERATORS_BEGIN);
|
||||
* ---
|
||||
* The non-token values are documented below:
|
||||
*
|
||||
* $(BOOKTABLE ,
|
||||
* $(TR $(TH Begin) $(TH End) $(TH Content) $(TH Examples))
|
||||
* $(TR $(TD OPERATORS_BEGIN) $(TD OPERATORS_END) $(TD operatiors) $(TD +, -, <<=))
|
||||
* $(TR $(TD TYPES_BEGIN) $(TD TYPES_END) $(TD types) $(TD bool, char, double))
|
||||
* $(TR $(TD KEYWORDS_BEGIN) $(TD KEYWORDS) $(TD keywords) $(TD class, if, assert))
|
||||
* $(TR $(TD ATTRIBUTES_BEGIN) $(TD ATTRIBUTES_END) $(TD attributes) $(TD override synchronized, __gshared))
|
||||
* $(TR $(TD ATTRIBUTES_BEGIN) $(TD ATTRIBUTES_END) $(TD protection) $(TD public, protected))
|
||||
* $(TR $(TD CONSTANTS_BEGIN) $(TD CONSTANTS_END) $(TD compile-time constants) $(TD __FILE__, __TIME__))
|
||||
* $(TR $(TD LITERALS_BEGIN) $(TD LITERALS_END) $(TD string and numeric literals) $(TD "str", 123))
|
||||
* $(TR $(TD NUMBERS_BEGIN) $(TD NUMBERS_END) $(TD numeric literals) $(TD 0x123p+9, 0b0110))
|
||||
* $(TR $(TD STRINGS_BEGIN) $(TD STRINGS_END) $(TD string literals) $(TD `123`c, q{tokens;}, "abcde"))
|
||||
* $(TR $(TD MISC_BEGIN) $(TD MISC_END) $(TD anything else) $(TD whitespace, comments, identifiers))
|
||||
* )
|
||||
* Note that several of the above ranges overlap.
|
||||
*/
|
||||
enum TokenType: uint
|
||||
{
|
||||
// Operators
|
||||
|
|
Loading…
Reference in New Issue