parser fixes

2013-01-27 14:00:14 -08:00 · 2013-01-27 14:00:14 -08:00 · 31f59384b9
parent c7b84ca0cc
commit 31f59384b9
7 changed files with 449 additions and 459 deletions
--- a/build.sh
+++ b/build.sh
@ -1,2 +1,2 @@
-dmd *.d std/d/*.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline
+#dmd *.d std/d/*.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline
-#dmd *.d std/d/*.d -g -m64 -w -wi -property -ofdscanner -L-lsqlite3 #-unittest
+dmd *.d std/d/*.d -g -m64 -w -wi -property -ofdscanner -L-lsqlite3 #-unittest
--- a/circularbuffer.d
+++ b/circularbuffer.d
@ -32,7 +32,7 @@ public:
 		}
 	}
-    override T front() const @property
+	override T front() @property
 	{
 		return data[index];
 	}
@ -94,6 +94,7 @@ public:
 			result = dg(front);
 			if (result)
 				break;
            popFront();
 		}
 		return result;
 	}
@ -107,6 +108,7 @@ public:
 			result = dg(i, front);
 			if (result)
 				break;
            popFront();
 		}
 		return result;
 	}
--- a/highlighter.d
+++ b/highlighter.d
@ -10,8 +10,6 @@ import std.stdio;
 import std.array;
 import std.d.lexer;
 import langutils;
 void writeSpan(string cssClass, string value)
 {
 	stdout.write(`<span class="`, cssClass, `">`, value.replace("&", "&amp;").replace("<", "&lt;"), `</span>`);
--- a/langutils.d
+++ b/langutils.d
@ -6,6 +6,7 @@
 module langutils;
 import std.array;
 import std.algorithm;
 import std.d.lexer;
@ -43,7 +44,7 @@ string combineTokens(ref const Token[] tokens)
 	return app.data;
 }
-pure string getTypeFromToken(const Token t)
+pure nothrow string getTypeFromToken(const Token t)
 {
 	switch (t.type)
 	{
@ -73,8 +74,14 @@ pure string getTypeFromToken(const Token t)
 	}
 }
-pure bool isIdentifierOrType(inout Token t)
+pure bool isIdentifierOrType(const Token t)
 {
 	return t.type == TokenType.Identifier || (t.type > TokenType.TYPES_BEGIN
 		&& TokenType.TYPES_END);
 }
 pure bool isDocComment(ref const Token t)
 {
    return t.value.startsWith("///") || t.value.startsWith("/**")
        || t.value.startsWith("/++");
 }
--- a/main.d
+++ b/main.d
@ -15,6 +15,7 @@ import std.parallelism;
 import std.path;
 import std.regex;
 import std.stdio;
 import std.range;
 import std.d.lexer;
 import autocomplete;
@ -28,7 +29,7 @@ import circularbuffer;
 immutable size_t CIRC_BUFF_SIZE = 4;
-pure bool isLineOfCode(TokenType t)
+pure nothrow bool isLineOfCode(TokenType t)
 {
 	switch(t)
 	{
@ -138,36 +139,21 @@ int main(string[] args)
 	{
 		if (args.length == 1)
 		{
-			auto f = appender!string();
+			writeln(stdin.byLine(KeepTerminator.yes).join().byToken().count!(a => isLineOfCode(a.type))());
 			char[] buf;
 			while (stdin.readln(buf))
 				f.put(buf);
 			writeln(f.data.byToken().count!(a => isLineOfCode(a.type))());
 		}
 		else
 		{
-			writeln(args[1..$].map!(a => a.readText().byToken())().joiner()
+			writeln(args[1..$].map!(a => File(a).byLine(KeepTerminator.yes).join().byToken())()
-				.count!(a => isLineOfCode(a.type))());
+                .joiner().count!(a => isLineOfCode(a.type))());
 		}
 		return 0;
 	}
 	if (highlight)
 	{
-		if (args.length == 1)
+        File f = args.length == 1 ? stdin : File(args[1]);
-		{
+        highlighter.highlight(f.byLine(KeepTerminator.yes).join().byToken(
 			auto f = appender!string();
 			char[] buf;
 			while (stdin.readln(buf))
 				f.put(buf);
 			highlighter.highlight(f.data.byToken(IterationStyle.Everything,
 				StringStyle.Source));
 		}
 		else
 		{
 			highlighter.highlight(args[1].readText().byToken(
            IterationStyle.Everything, StringStyle.Source));
 		}
 		return 0;
 	}
@ -213,20 +199,9 @@ int main(string[] args)
 	if (json)
 	{
 		CircularBuffer!(Token) tokens;
-		if (args.length == 1)
+        File f = args.length == 1 ? stdin : File(args[1]);
-		{
+        tokens = new CircularBuffer!(Token)(CIRC_BUFF_SIZE,
-			// Read from stdin
+            f.byLine(KeepTerminator.yes).join().byToken!(char[])());
 			auto f = appender!string();
 			char[] buf;
 			while (stdin.readln(buf))
 				f.put(buf);
 			tokens = new CircularBuffer!(Token)(CIRC_BUFF_SIZE, byToken!string(f.data));
 		}
 		else
 		{
 			// read given file
 			tokens = new CircularBuffer!(Token)(CIRC_BUFF_SIZE, byToken!string(readText(args[1])));
 		}
 		auto mod = parseModule(tokens);
 		mod.writeJSONTo(stdout);
 		return 0;
--- a/parser.d
+++ b/parser.d
@ -26,6 +26,7 @@ public:
 	this(InputRange!Token tokens, TokenType open, TokenType close)
 	{
 		super(0, tokens);
        this.range = tokens;
 		this.open = open;
 		this.close = close;
 	}
@ -35,7 +36,7 @@ public:
 		return _empty;
 	}
-	override Token front() const @property
+	override Token front() @property
 	{
 		return range.front;
 	}
@ -47,14 +48,20 @@ public:
 			++depth;
 		else if (range.front == close)
 			--depth;
-		_empty = depth == 0;
+		_empty = depth == 0 || range.empty;
 	}
    invariant()
    {
        assert (range);
        assert (depth >= 0);
    }
 private:
 	int depth;
 	TokenType open;
 	TokenType close;
-	TokenBuffer range;
+	InputRange!(Token) range;
 	bool _empty;
 }
--- a/std/d/lexer.d
+++ b/std/d/lexer.d
@ -1,71 +1,71 @@
 // Written in the D programming language
 /**
- * This module contains a range-based lexer for the D programming language.
+* This module contains a range-based lexer for the D programming language.
- *
+*
- * Examples:
+* Examples:
- *
+*
- * Generate HTML markup of D code.
+* Generate HTML markup of D code.
- * ---
+* ---
- * import std.stdio;
+* import std.stdio;
- * import std.array;
+* import std.array;
- * import std.file;
+* import std.file;
- * import std.d.lexer;
+* import std.d.lexer;
- *
+*
- * void writeSpan(string cssClass, string value)
+* void writeSpan(string cssClass, string value)
- * {
+* {
- * 	stdout.write(`<span class="`, cssClass, `">`, value.replace("&", "&amp;").replace("<", "&lt;"), `</span>`);
+* 	stdout.write(`<span class="`, cssClass, `">`, value.replace("&", "&amp;").replace("<", "&lt;"), `</span>`);
- * }
+* }
- *
+*
- * void highlight(R)(R tokens)
+* void highlight(R)(R tokens)
- * {
+* {
- * 	stdout.writeln(q"[<!DOCTYPE html>
+* 	stdout.writeln(q"[<!DOCTYPE html>
- * <html>
+* <html>
- * <head>
+* <head>
- * <meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
+* <meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
- * <body>
+* <body>
- * <style type="text/css">
+* <style type="text/css">
- * html { background-color: #fff; color: #222; }
+* html { background-color: #fff; color: #222; }
- * .kwrd { font-weight: bold; color: blue; }
+* .kwrd { font-weight: bold; color: blue; }
- * .com { color: green; font-style: italic;}
+* .com { color: green; font-style: italic;}
- * .num { color: orangered; font-weigth: bold; }
+* .num { color: orangered; font-weigth: bold; }
- * .str { color: red; font-style: italic; }
+* .str { color: red; font-style: italic; }
- * .op { color: 333; font-weight: bold; }
+* .op { color: 333; font-weight: bold; }
- * .type { color: magenta; font-weight: bold; }
+* .type { color: magenta; font-weight: bold; }
- * </style>
+* </style>
- * <pre>]");
+* <pre>]");
- *
+*
- * 	foreach (Token t; tokens)
+* 	foreach (Token t; tokens)
- * 	{
+* 	{
- * 		if (t.type > TokenType.TYPES_BEGIN && t.type < TokenType.TYPES_END)
+* 		if (t.type > TokenType.TYPES_BEGIN && t.type < TokenType.TYPES_END)
- * 			writeSpan("type", t.value);
+* 			writeSpan("type", t.value);
- * 		else if (t.type > TokenType.KEYWORDS_BEGIN && t.type < TokenType.KEYWORDS_END)
+* 		else if (t.type > TokenType.KEYWORDS_BEGIN && t.type < TokenType.KEYWORDS_END)
- * 			writeSpan("kwrd", t.value);
+* 			writeSpan("kwrd", t.value);
- * 		else if (t.type == TokenType.Comment)
+* 		else if (t.type == TokenType.Comment)
- * 			writeSpan("com", t.value);
+* 			writeSpan("com", t.value);
- * 		else if (t.type > TokenType.STRINGS_BEGIN && t.type < TokenType.STRINGS_END)
+* 		else if (t.type > TokenType.STRINGS_BEGIN && t.type < TokenType.STRINGS_END)
- * 			writeSpan("str", t.value);
+* 			writeSpan("str", t.value);
- * 		else if (t.type > TokenType.NUMBERS_BEGIN && t.type < TokenType.NUMBERS_END)
+* 		else if (t.type > TokenType.NUMBERS_BEGIN && t.type < TokenType.NUMBERS_END)
- * 			writeSpan("num", t.value);
+* 			writeSpan("num", t.value);
- * 		else if (t.type > TokenType.OPERATORS_BEGIN && t.type < TokenType.OPERATORS_END)
+* 		else if (t.type > TokenType.OPERATORS_BEGIN && t.type < TokenType.OPERATORS_END)
- * 			writeSpan("op", t.value);
+* 			writeSpan("op", t.value);
- * 		else
+* 		else
- * 			stdout.write(t.value.replace("<", "&lt;"));
+* 			stdout.write(t.value.replace("<", "&lt;"));
- * 	}
+* 	}
- * 	stdout.writeln("</pre>\n</body></html>");
+* 	stdout.writeln("</pre>\n</body></html>");
- * }
+* }
- *
+*
- * void main(string[] args)
+* void main(string[] args)
- * {
+* {
- *     args[1].readText().byToken(IterationStyle.Everything, StringStyle.Source).highlight();
+*     args[1].readText().byToken(IterationStyle.Everything, StringStyle.Source).highlight();
- * }
+* }
- * ---
+* ---
- *
+*
- * Copyright: Brian Schott 2013
+* Copyright: Brian Schott 2013
- * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
+* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
- * Authors: Brian Schott
+* Authors: Brian Schott
- * Source: $(PHOBOSSRC std/d/_lexer.d)
+* Source: $(PHOBOSSRC std/d/_lexer.d)
- */
+*/
 module std.d.lexer;
@ -81,8 +81,8 @@ import std.d.entities;
 public:
 /**
- * Represents a D token
+* Represents a D token
- */
+*/
 struct Token
 {
 	/// The token type.
@ -129,8 +129,8 @@ struct Token
 }
 /**
- * Configure the behavior of the byToken() function
+* Configure the behavior of the byToken() function
- */
+*/
 enum IterationStyle
 {
 	/// Only include code, not whitespace or comments
@ -146,8 +146,8 @@ enum IterationStyle
 }
 /**
- * Configuration of the string lexing style
+* Configuration of the string lexing style
- */
+*/
 enum StringStyle : uint
 {
 	/**
@ -181,16 +181,17 @@ enum StringStyle : uint
 }
 /**
- * Iterate over the given range of characters by D tokens.
+* Iterate over the given range of characters by D tokens.
- * Params:
+* Params:
- *     range = the range of characters
+*     range = the range of characters
- *     iterationStyle = See IterationStyle
+*     iterationStyle = See IterationStyle
- *     stringStyle = see StringStyle
+*     stringStyle = see StringStyle
- * Returns:
+* Returns:
- *     an input range of tokens
+*     an input range of tokens
- */
+*/
 TokenRange!(R) byToken(R)(R range, const IterationStyle iterationStyle = IterationStyle.CodeOnly,
-	const StringStyle stringStyle = StringStyle.Default) if (isForwardRange!(R) && isSomeChar!(ElementType!(R)))
+	const StringStyle stringStyle = StringStyle.Default)
 	if (isForwardRange!(R) && (isSomeChar!(ElementType!(R)) || is (ElementType!(R) == ubyte)))
 {
 	auto r = new TokenRange!(R)(range);
 	r.stringStyle = stringStyle;
@ -201,9 +202,9 @@ TokenRange!(R) byToken(R)(R range, const IterationStyle iterationStyle = Iterati
 }
 /**
- * Range of tokens. Avoid creating instances of this manually. Use
+* Range of tokens. Avoid creating instances of this manually. Use
- * $(DDOC_PSYMBOL byToken$(LPAREN)$(RPAREN)) instead, as it does some initialization work.
+* $(DDOC_PSYMBOL byToken$(LPAREN)$(RPAREN)) instead, as it does some initialization work.
- */
+*/
 class TokenRange(R) : InputRange!(Token)
 {
 	this(ref R range)
@ -504,31 +505,31 @@ unittest
 }
 /**
- * Listing of all the tokens in the D language.
+* Listing of all the tokens in the D language.
- *
+*
- * Token types are arranged so that it is easy to group tokens while iterating
+* Token types are arranged so that it is easy to group tokens while iterating
- * over them. For example:
+* over them. For example:
- * ---
+* ---
- * assert(TokenType.Increment < TokenType.OPERATORS_END);
+* assert(TokenType.Increment < TokenType.OPERATORS_END);
- * assert(TokenType.Increment > TokenType.OPERATORS_BEGIN);
+* assert(TokenType.Increment > TokenType.OPERATORS_BEGIN);
- * ---
+* ---
- * The non-token values are documented below:
+* The non-token values are documented below:
- *
+*
- * $(BOOKTABLE ,
+* $(BOOKTABLE ,
- *     $(TR $(TH Begin) $(TH End) $(TH Content) $(TH Examples))
+*     $(TR $(TH Begin) $(TH End) $(TH Content) $(TH Examples))
- *     $(TR $(TD OPERATORS_BEGIN) $(TD OPERATORS_END) $(TD operatiors) $(TD +, -, <<=))
+*     $(TR $(TD OPERATORS_BEGIN) $(TD OPERATORS_END) $(TD operatiors) $(TD +, -, <<=))
- *     $(TR $(TD TYPES_BEGIN) $(TD TYPES_END) $(TD types) $(TD bool, char, double))
+*     $(TR $(TD TYPES_BEGIN) $(TD TYPES_END) $(TD types) $(TD bool, char, double))
- *     $(TR $(TD KEYWORDS_BEGIN) $(TD KEYWORDS) $(TD keywords) $(TD class, if, assert))
+*     $(TR $(TD KEYWORDS_BEGIN) $(TD KEYWORDS) $(TD keywords) $(TD class, if, assert))
- *     $(TR $(TD ATTRIBUTES_BEGIN) $(TD ATTRIBUTES_END) $(TD attributes) $(TD override synchronized, __gshared))
+*     $(TR $(TD ATTRIBUTES_BEGIN) $(TD ATTRIBUTES_END) $(TD attributes) $(TD override synchronized, __gshared))
- *     $(TR $(TD ATTRIBUTES_BEGIN) $(TD ATTRIBUTES_END) $(TD protection) $(TD public, protected))
+*     $(TR $(TD ATTRIBUTES_BEGIN) $(TD ATTRIBUTES_END) $(TD protection) $(TD public, protected))
- *     $(TR $(TD CONSTANTS_BEGIN) $(TD CONSTANTS_END) $(TD compile-time constants) $(TD __FILE__, __TIME__))
+*     $(TR $(TD CONSTANTS_BEGIN) $(TD CONSTANTS_END) $(TD compile-time constants) $(TD __FILE__, __TIME__))
- *     $(TR $(TD LITERALS_BEGIN) $(TD LITERALS_END) $(TD string and numeric literals) $(TD "str", 123))
+*     $(TR $(TD LITERALS_BEGIN) $(TD LITERALS_END) $(TD string and numeric literals) $(TD "str", 123))
- *     $(TR $(TD NUMBERS_BEGIN) $(TD NUMBERS_END) $(TD numeric literals) $(TD 0x123p+9, 0b0110))
+*     $(TR $(TD NUMBERS_BEGIN) $(TD NUMBERS_END) $(TD numeric literals) $(TD 0x123p+9, 0b0110))
- *     $(TR $(TD STRINGS_BEGIN) $(TD STRINGS_END) $(TD string literals) $(TD `123`c, q{tokens;}, "abcde"))
+*     $(TR $(TD STRINGS_BEGIN) $(TD STRINGS_END) $(TD string literals) $(TD `123`c, q{tokens;}, "abcde"))
- *     $(TR $(TD MISC_BEGIN) $(TD MISC_END) $(TD anything else) $(TD whitespace, comments, identifiers))
+*     $(TR $(TD MISC_BEGIN) $(TD MISC_END) $(TD anything else) $(TD whitespace, comments, identifiers))
- * )
+* )
- * Note that several of the above ranges overlap.
+* Note that several of the above ranges overlap.
- */
+*/
 enum TokenType: uint
 {
 	// Operators
`@ -1,2 +1,2 @@`
	`dmd .d std/d/.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline`	`#dmd .d std/d/.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline`
	`#dmd .d std/d/.d -g -m64 -w -wi -property -ofdscanner -L-lsqlite3 #-unittest`	`dmd .d std/d/.d -g -m64 -w -wi -property -ofdscanner -L-lsqlite3 #-unittest`