parser fixes

2013-01-27 14:00:14 -08:00 · 2013-01-27 14:00:14 -08:00 · 31f59384b9
parent c7b84ca0cc
commit 31f59384b9
7 changed files with 449 additions and 459 deletions
--- a/build.sh
+++ b/build.sh
@ -1,2 +1,2 @@
-dmd *.d std/d/*.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline
-#dmd *.d std/d/*.d -g -m64 -w -wi -property -ofdscanner -L-lsqlite3 #-unittest
+#dmd *.d std/d/*.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline
+dmd *.d std/d/*.d -g -m64 -w -wi -property -ofdscanner -L-lsqlite3 #-unittest
--- a/circularbuffer.d
+++ b/circularbuffer.d
@ -32,7 +32,7 @@ public:
 		}
 	}

-    override T front() const @property
+	override T front() @property
 	{
 		return data[index];
 	}
@ -94,6 +94,7 @@ public:
 			result = dg(front);
 			if (result)
 				break;
+            popFront();
 		}
 		return result;
 	}
@ -107,6 +108,7 @@ public:
 			result = dg(i, front);
 			if (result)
 				break;
+            popFront();
 		}
 		return result;
 	}
--- a/highlighter.d
+++ b/highlighter.d
@ -10,8 +10,6 @@ import std.stdio;
 import std.array;
 import std.d.lexer;

-import langutils;
-
 void writeSpan(string cssClass, string value)
 {
 	stdout.write(`<span class="`, cssClass, `">`, value.replace("&", "&amp;").replace("<", "&lt;"), `</span>`);
--- a/langutils.d
+++ b/langutils.d
@ -6,6 +6,7 @@
 module langutils;

 import std.array;
+import std.algorithm;
 import std.d.lexer;


@ -43,7 +44,7 @@ string combineTokens(ref const Token[] tokens)
 	return app.data;
 }

-pure string getTypeFromToken(const Token t)
+pure nothrow string getTypeFromToken(const Token t)
 {
 	switch (t.type)
 	{
@ -73,8 +74,14 @@ pure string getTypeFromToken(const Token t)
 	}
 }

-pure bool isIdentifierOrType(inout Token t)
+pure bool isIdentifierOrType(const Token t)
 {
 	return t.type == TokenType.Identifier || (t.type > TokenType.TYPES_BEGIN
 		&& TokenType.TYPES_END);
 }
+
+pure bool isDocComment(ref const Token t)
+{
+    return t.value.startsWith("///") || t.value.startsWith("/**")
+        || t.value.startsWith("/++");
+}
--- a/main.d
+++ b/main.d
@ -15,6 +15,7 @@ import std.parallelism;
 import std.path;
 import std.regex;
 import std.stdio;
+import std.range;
 import std.d.lexer;

 import autocomplete;
@ -28,7 +29,7 @@ import circularbuffer;

 immutable size_t CIRC_BUFF_SIZE = 4;

-pure bool isLineOfCode(TokenType t)
+pure nothrow bool isLineOfCode(TokenType t)
 {
 	switch(t)
 	{
@ -138,36 +139,21 @@ int main(string[] args)
 	{
 		if (args.length == 1)
 		{
-			auto f = appender!string();
-			char[] buf;
-			while (stdin.readln(buf))
-				f.put(buf);
-			writeln(f.data.byToken().count!(a => isLineOfCode(a.type))());
+			writeln(stdin.byLine(KeepTerminator.yes).join().byToken().count!(a => isLineOfCode(a.type))());
 		}
 		else
 		{
-			writeln(args[1..$].map!(a => a.readText().byToken())().joiner()
-				.count!(a => isLineOfCode(a.type))());
+			writeln(args[1..$].map!(a => File(a).byLine(KeepTerminator.yes).join().byToken())()
+                .joiner().count!(a => isLineOfCode(a.type))());
 		}
 		return 0;
 	}

 	if (highlight)
 	{
-		if (args.length == 1)
-		{
-			auto f = appender!string();
-			char[] buf;
-			while (stdin.readln(buf))
-				f.put(buf);
-			highlighter.highlight(f.data.byToken(IterationStyle.Everything,
-				StringStyle.Source));
-		}
-		else
-		{
-			highlighter.highlight(args[1].readText().byToken(
+        File f = args.length == 1 ? stdin : File(args[1]);
+        highlighter.highlight(f.byLine(KeepTerminator.yes).join().byToken(
            IterationStyle.Everything, StringStyle.Source));
-		}
 		return 0;
 	}

@ -213,20 +199,9 @@ int main(string[] args)
 	if (json)
 	{
 		CircularBuffer!(Token) tokens;
-		if (args.length == 1)
-		{
-			// Read from stdin
-			auto f = appender!string();
-			char[] buf;
-			while (stdin.readln(buf))
-				f.put(buf);
-			tokens = new CircularBuffer!(Token)(CIRC_BUFF_SIZE, byToken!string(f.data));
-		}
-		else
-		{
-			// read given file
-			tokens = new CircularBuffer!(Token)(CIRC_BUFF_SIZE, byToken!string(readText(args[1])));
-		}
+        File f = args.length == 1 ? stdin : File(args[1]);
+        tokens = new CircularBuffer!(Token)(CIRC_BUFF_SIZE,
+            f.byLine(KeepTerminator.yes).join().byToken!(char[])());
 		auto mod = parseModule(tokens);
 		mod.writeJSONTo(stdout);
 		return 0;
--- a/parser.d
+++ b/parser.d
@ -26,6 +26,7 @@ public:
 	this(InputRange!Token tokens, TokenType open, TokenType close)
 	{
 		super(0, tokens);
+        this.range = tokens;
 		this.open = open;
 		this.close = close;
 	}
@ -35,7 +36,7 @@ public:
 		return _empty;
 	}

-	override Token front() const @property
+	override Token front() @property
 	{
 		return range.front;
 	}
@ -47,14 +48,20 @@ public:
 			++depth;
 		else if (range.front == close)
 			--depth;
-		_empty = depth == 0;
+		_empty = depth == 0 || range.empty;
+	}
+
+    invariant()
+    {
+        assert (range);
+        assert (depth >= 0);
    }

 private:
 	int depth;
 	TokenType open;
 	TokenType close;
-	TokenBuffer range;
+	InputRange!(Token) range;
 	bool _empty;
 }

--- a/std/d/lexer.d
+++ b/std/d/lexer.d
@ -1,71 +1,71 @@
 // Written in the D programming language

 /**
- * This module contains a range-based lexer for the D programming language.
- *
- * Examples:
- *
- * Generate HTML markup of D code.
- * ---
- * import std.stdio;
- * import std.array;
- * import std.file;
- * import std.d.lexer;
- *
- * void writeSpan(string cssClass, string value)
- * {
- * 	stdout.write(`<span class="`, cssClass, `">`, value.replace("&", "&amp;").replace("<", "&lt;"), `</span>`);
- * }
- *
- * void highlight(R)(R tokens)
- * {
- * 	stdout.writeln(q"[<!DOCTYPE html>
- * <html>
- * <head>
- * <meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
- * <body>
- * <style type="text/css">
- * html { background-color: #fff; color: #222; }
- * .kwrd { font-weight: bold; color: blue; }
- * .com { color: green; font-style: italic;}
- * .num { color: orangered; font-weigth: bold; }
- * .str { color: red; font-style: italic; }
- * .op { color: 333; font-weight: bold; }
- * .type { color: magenta; font-weight: bold; }
- * </style>
- * <pre>]");
- *
- * 	foreach (Token t; tokens)
- * 	{
- * 		if (t.type > TokenType.TYPES_BEGIN && t.type < TokenType.TYPES_END)
- * 			writeSpan("type", t.value);
- * 		else if (t.type > TokenType.KEYWORDS_BEGIN && t.type < TokenType.KEYWORDS_END)
- * 			writeSpan("kwrd", t.value);
- * 		else if (t.type == TokenType.Comment)
- * 			writeSpan("com", t.value);
- * 		else if (t.type > TokenType.STRINGS_BEGIN && t.type < TokenType.STRINGS_END)
- * 			writeSpan("str", t.value);
- * 		else if (t.type > TokenType.NUMBERS_BEGIN && t.type < TokenType.NUMBERS_END)
- * 			writeSpan("num", t.value);
- * 		else if (t.type > TokenType.OPERATORS_BEGIN && t.type < TokenType.OPERATORS_END)
- * 			writeSpan("op", t.value);
- * 		else
- * 			stdout.write(t.value.replace("<", "&lt;"));
- * 	}
- * 	stdout.writeln("</pre>\n</body></html>");
- * }
- *
- * void main(string[] args)
- * {
- *     args[1].readText().byToken(IterationStyle.Everything, StringStyle.Source).highlight();
- * }
- * ---
- *
- * Copyright: Brian Schott 2013
- * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
- * Authors: Brian Schott
- * Source: $(PHOBOSSRC std/d/_lexer.d)
- */
+* This module contains a range-based lexer for the D programming language.
+*
+* Examples:
+*
+* Generate HTML markup of D code.
+* ---
+* import std.stdio;
+* import std.array;
+* import std.file;
+* import std.d.lexer;
+*
+* void writeSpan(string cssClass, string value)
+* {
+* 	stdout.write(`<span class="`, cssClass, `">`, value.replace("&", "&amp;").replace("<", "&lt;"), `</span>`);
+* }
+*
+* void highlight(R)(R tokens)
+* {
+* 	stdout.writeln(q"[<!DOCTYPE html>
+* <html>
+* <head>
+* <meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
+* <body>
+* <style type="text/css">
+* html { background-color: #fff; color: #222; }
+* .kwrd { font-weight: bold; color: blue; }
+* .com { color: green; font-style: italic;}
+* .num { color: orangered; font-weigth: bold; }
+* .str { color: red; font-style: italic; }
+* .op { color: 333; font-weight: bold; }
+* .type { color: magenta; font-weight: bold; }
+* </style>
+* <pre>]");
+*
+* 	foreach (Token t; tokens)
+* 	{
+* 		if (t.type > TokenType.TYPES_BEGIN && t.type < TokenType.TYPES_END)
+* 			writeSpan("type", t.value);
+* 		else if (t.type > TokenType.KEYWORDS_BEGIN && t.type < TokenType.KEYWORDS_END)
+* 			writeSpan("kwrd", t.value);
+* 		else if (t.type == TokenType.Comment)
+* 			writeSpan("com", t.value);
+* 		else if (t.type > TokenType.STRINGS_BEGIN && t.type < TokenType.STRINGS_END)
+* 			writeSpan("str", t.value);
+* 		else if (t.type > TokenType.NUMBERS_BEGIN && t.type < TokenType.NUMBERS_END)
+* 			writeSpan("num", t.value);
+* 		else if (t.type > TokenType.OPERATORS_BEGIN && t.type < TokenType.OPERATORS_END)
+* 			writeSpan("op", t.value);
+* 		else
+* 			stdout.write(t.value.replace("<", "&lt;"));
+* 	}
+* 	stdout.writeln("</pre>\n</body></html>");
+* }
+*
+* void main(string[] args)
+* {
+*     args[1].readText().byToken(IterationStyle.Everything, StringStyle.Source).highlight();
+* }
+* ---
+*
+* Copyright: Brian Schott 2013
+* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
+* Authors: Brian Schott
+* Source: $(PHOBOSSRC std/d/_lexer.d)
+*/

 module std.d.lexer;

@ -81,8 +81,8 @@ import std.d.entities;
 public:

 /**
- * Represents a D token
- */
+* Represents a D token
+*/
 struct Token
 {
 	/// The token type.
@ -129,8 +129,8 @@ struct Token
 }

 /**
- * Configure the behavior of the byToken() function
- */
+* Configure the behavior of the byToken() function
+*/
 enum IterationStyle
 {
 	/// Only include code, not whitespace or comments
@ -146,8 +146,8 @@ enum IterationStyle
 }

 /**
- * Configuration of the string lexing style
- */
+* Configuration of the string lexing style
+*/
 enum StringStyle : uint
 {
 	/**
@ -181,16 +181,17 @@ enum StringStyle : uint
 }

 /**
- * Iterate over the given range of characters by D tokens.
- * Params:
- *     range = the range of characters
- *     iterationStyle = See IterationStyle
- *     stringStyle = see StringStyle
- * Returns:
- *     an input range of tokens
- */
+* Iterate over the given range of characters by D tokens.
+* Params:
+*     range = the range of characters
+*     iterationStyle = See IterationStyle
+*     stringStyle = see StringStyle
+* Returns:
+*     an input range of tokens
+*/
 TokenRange!(R) byToken(R)(R range, const IterationStyle iterationStyle = IterationStyle.CodeOnly,
-	const StringStyle stringStyle = StringStyle.Default) if (isForwardRange!(R) && isSomeChar!(ElementType!(R)))
+	const StringStyle stringStyle = StringStyle.Default)
+	if (isForwardRange!(R) && (isSomeChar!(ElementType!(R)) || is (ElementType!(R) == ubyte)))
 {
 	auto r = new TokenRange!(R)(range);
 	r.stringStyle = stringStyle;
@ -201,9 +202,9 @@ TokenRange!(R) byToken(R)(R range, const IterationStyle iterationStyle = Iterati
 }

 /**
- * Range of tokens. Avoid creating instances of this manually. Use
- * $(DDOC_PSYMBOL byToken$(LPAREN)$(RPAREN)) instead, as it does some initialization work.
- */
+* Range of tokens. Avoid creating instances of this manually. Use
+* $(DDOC_PSYMBOL byToken$(LPAREN)$(RPAREN)) instead, as it does some initialization work.
+*/
 class TokenRange(R) : InputRange!(Token)
 {
 	this(ref R range)
@ -504,31 +505,31 @@ unittest
 }

 /**
- * Listing of all the tokens in the D language.
- *
- * Token types are arranged so that it is easy to group tokens while iterating
- * over them. For example:
- * ---
- * assert(TokenType.Increment < TokenType.OPERATORS_END);
- * assert(TokenType.Increment > TokenType.OPERATORS_BEGIN);
- * ---
- * The non-token values are documented below:
- *
- * $(BOOKTABLE ,
- *     $(TR $(TH Begin) $(TH End) $(TH Content) $(TH Examples))
- *     $(TR $(TD OPERATORS_BEGIN) $(TD OPERATORS_END) $(TD operatiors) $(TD +, -, <<=))
- *     $(TR $(TD TYPES_BEGIN) $(TD TYPES_END) $(TD types) $(TD bool, char, double))
- *     $(TR $(TD KEYWORDS_BEGIN) $(TD KEYWORDS) $(TD keywords) $(TD class, if, assert))
- *     $(TR $(TD ATTRIBUTES_BEGIN) $(TD ATTRIBUTES_END) $(TD attributes) $(TD override synchronized, __gshared))
- *     $(TR $(TD ATTRIBUTES_BEGIN) $(TD ATTRIBUTES_END) $(TD protection) $(TD public, protected))
- *     $(TR $(TD CONSTANTS_BEGIN) $(TD CONSTANTS_END) $(TD compile-time constants) $(TD __FILE__, __TIME__))
- *     $(TR $(TD LITERALS_BEGIN) $(TD LITERALS_END) $(TD string and numeric literals) $(TD "str", 123))
- *     $(TR $(TD NUMBERS_BEGIN) $(TD NUMBERS_END) $(TD numeric literals) $(TD 0x123p+9, 0b0110))
- *     $(TR $(TD STRINGS_BEGIN) $(TD STRINGS_END) $(TD string literals) $(TD `123`c, q{tokens;}, "abcde"))
- *     $(TR $(TD MISC_BEGIN) $(TD MISC_END) $(TD anything else) $(TD whitespace, comments, identifiers))
- * )
- * Note that several of the above ranges overlap.
- */
+* Listing of all the tokens in the D language.
+*
+* Token types are arranged so that it is easy to group tokens while iterating
+* over them. For example:
+* ---
+* assert(TokenType.Increment < TokenType.OPERATORS_END);
+* assert(TokenType.Increment > TokenType.OPERATORS_BEGIN);
+* ---
+* The non-token values are documented below:
+*
+* $(BOOKTABLE ,
+*     $(TR $(TH Begin) $(TH End) $(TH Content) $(TH Examples))
+*     $(TR $(TD OPERATORS_BEGIN) $(TD OPERATORS_END) $(TD operatiors) $(TD +, -, <<=))
+*     $(TR $(TD TYPES_BEGIN) $(TD TYPES_END) $(TD types) $(TD bool, char, double))
+*     $(TR $(TD KEYWORDS_BEGIN) $(TD KEYWORDS) $(TD keywords) $(TD class, if, assert))
+*     $(TR $(TD ATTRIBUTES_BEGIN) $(TD ATTRIBUTES_END) $(TD attributes) $(TD override synchronized, __gshared))
+*     $(TR $(TD ATTRIBUTES_BEGIN) $(TD ATTRIBUTES_END) $(TD protection) $(TD public, protected))
+*     $(TR $(TD CONSTANTS_BEGIN) $(TD CONSTANTS_END) $(TD compile-time constants) $(TD __FILE__, __TIME__))
+*     $(TR $(TD LITERALS_BEGIN) $(TD LITERALS_END) $(TD string and numeric literals) $(TD "str", 123))
+*     $(TR $(TD NUMBERS_BEGIN) $(TD NUMBERS_END) $(TD numeric literals) $(TD 0x123p+9, 0b0110))
+*     $(TR $(TD STRINGS_BEGIN) $(TD STRINGS_END) $(TD string literals) $(TD `123`c, q{tokens;}, "abcde"))
+*     $(TR $(TD MISC_BEGIN) $(TD MISC_END) $(TD anything else) $(TD whitespace, comments, identifiers))
+* )
+* Note that several of the above ranges overlap.
+*/
 enum TokenType: uint
 {
 	// Operators