Merge branch 'range-based-lexer' of https://github.com/Hackerpilot/Dscanner into range-based-lexer

2013-01-27 14:22:11 +00:00 · 2013-01-27 14:22:11 +00:00 · 1055a47087
parent e7555b2f05 31ffb3f470
commit 1055a47087
7 changed files with 100 additions and 112 deletions
--- a/build.sh
+++ b/build.sh
@ -1,2 +1,2 @@
-dmd *.d std/d/*.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline
+#dmd *.d std/d/*.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline
-#dmd *.d std/d/*.d -g -m64 -w -wi -property -ofdscanner -L-lsqlite3 #-unittest
+dmd *.d std/d/*.d -g -m64 -w -wi -property -ofdscanner -L-lsqlite3 #-unittest
--- a/circularbuffer.d
+++ b/circularbuffer.d
@ -14,28 +14,28 @@ class CircularBuffer(T) : InputRange!(T)
 {
 public:
-    this (size_t size, InputRange!(T) range)
+	this (size_t size, InputRange!(T) range)
-    {
+	{
-        this.range = range;
+		this.range = range;
-        this.margin = size;
+		this.margin = size;
-        data = new T[(margin * 2) + 1];
+		data = new T[(margin * 2) + 1];
-        if (range.empty())
+		if (range.empty())
-        {
+		{
-            _empty = true;
+			_empty = true;
-            return;
+			return;
-        }
+		}
-        for (size_t i = 0; i <= margin && !this.range.empty(); ++i)
+		for (size_t i = 0; i <= margin && !this.range.empty(); ++i)
-        {
+		{
-            data[i] = this.range.front();
+			data[i] = this.range.front();
-            this.range.popFront();
+			this.range.popFront();
 			end++;
-        }
+		}
-    }
+	}
-    override T front() const @property
+	override T front() @property
-    {
+	{
-        return data[index];
+		return data[index];
-    }
+	}
 	T peek(int offset = 1)
 	in
@ -52,32 +52,32 @@ public:
 		return abs(offset) <= margin && sourceIndex + offset >= 0;
 	}
-    override void popFront()
+	override void popFront()
 	in
 	{
 		assert (!_empty);
 	}
 	body
-    {
+	{
 		index = (index + 1) % data.length;
 		++sourceIndex;
-        if (range.empty())
+		if (range.empty())
 		{
 			if (index == end)
 				_empty = true;
 		}
-        else
+		else
-        {
+		{
 			data[end] = range.front();
 			end = (end + 1) % data.length;
 			range.popFront();
-        }
+		}
-    }
+	}
-    bool empty() const @property
+	bool empty() const @property
-    {
+	{
-        return _empty;
+		return _empty;
-    }
+	}
 	override T moveFront()
 	{
@ -94,6 +94,7 @@ public:
 			result = dg(front);
 			if (result)
 				break;
            popFront();
 		}
 		return result;
 	}
@ -107,18 +108,19 @@ public:
 			result = dg(i, front);
 			if (result)
 				break;
            popFront();
 		}
 		return result;
 	}
 private:
-    InputRange!(T) range;
+	InputRange!(T) range;
-    immutable size_t margin;
+	immutable size_t margin;
-    T[] data;
+	T[] data;
 	size_t sourceIndex;
 	size_t end;
-    size_t index;
+	size_t index;
-    bool _empty;
+	bool _empty;
 }
 unittest
@ -131,10 +133,10 @@ unittest
 unittest
 {
-    int[] arr = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+	int[] arr = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
-    auto buf = CircularBuffer!(int, int[])(2, arr);
+	auto buf = CircularBuffer!(int, int[])(2, arr);
-    assert (buf.data.length == 5);
+	assert (buf.data.length == 5);
-    auto iterated = array(buf);
+	auto iterated = array(buf);
 	assert (iterated == arr);
 }
--- a/highlighter.d
+++ b/highlighter.d
@ -10,8 +10,6 @@ import std.stdio;
 import std.array;
 import std.d.lexer;
 import langutils;
 void writeSpan(string cssClass, string value)
 {
 	stdout.write(`<span class="`, cssClass, `">`, value.replace("&", "&amp;").replace("<", "&lt;"), `</span>`);
--- a/langutils.d
+++ b/langutils.d
@ -6,6 +6,7 @@
 module langutils;
 import std.array;
 import std.algorithm;
 import std.d.lexer;
@ -43,7 +44,7 @@ string combineTokens(ref const Token[] tokens)
 	return app.data;
 }
-pure string getTypeFromToken(const Token t)
+pure nothrow string getTypeFromToken(const Token t)
 {
 	switch (t.type)
 	{
@ -73,8 +74,14 @@ pure string getTypeFromToken(const Token t)
 	}
 }
-pure bool isIdentifierOrType(inout Token t)
+pure bool isIdentifierOrType(const Token t)
 {
 	return t.type == TokenType.Identifier || (t.type > TokenType.TYPES_BEGIN
 		&& TokenType.TYPES_END);
 }
 pure bool isDocComment(ref const Token t)
 {
    return t.value.startsWith("///") || t.value.startsWith("/**")
        || t.value.startsWith("/++");
 }
--- a/main.d
+++ b/main.d
@ -15,6 +15,7 @@ import std.parallelism;
 import std.path;
 import std.regex;
 import std.stdio;
 import std.range;
 import std.d.lexer;
 import autocomplete;
@ -28,7 +29,7 @@ import circularbuffer;
 immutable size_t CIRC_BUFF_SIZE = 4;
-pure bool isLineOfCode(TokenType t)
+pure nothrow bool isLineOfCode(TokenType t)
 {
 	switch(t)
 	{
@ -138,36 +139,21 @@ int main(string[] args)
 	{
 		if (args.length == 1)
 		{
-			auto f = appender!string();
+			writeln(stdin.byLine(KeepTerminator.yes).join().byToken().count!(a => isLineOfCode(a.type))());
 			char[] buf;
 			while (stdin.readln(buf))
 				f.put(buf);
 			writeln(f.data.byToken().count!(a => isLineOfCode(a.type))());
 		}
 		else
 		{
-			writeln(args[1..$].map!(a => a.readText().byToken())().joiner()
+			writeln(args[1..$].map!(a => File(a).byLine(KeepTerminator.yes).join().byToken(a))()
-				.count!(a => isLineOfCode(a.type))());
+                .joiner().count!(a => isLineOfCode(a.type))());
 		}
 		return 0;
 	}
 	if (highlight)
 	{
-		if (args.length == 1)
+        File f = args.length == 1 ? stdin : File(args[1]);
-		{
+        highlighter.highlight(f.byLine(KeepTerminator.yes).join().byToken(
-			auto f = appender!string();
+            "", IterationStyle.Everything, TokenStyle.Source));
 			char[] buf;
 			while (stdin.readln(buf))
 				f.put(buf);
 			highlighter.highlight(f.data.byToken("stdin", IterationStyle.Everything,
 				TokenStyle.Source));
 		}
 		else
 		{
 			highlighter.highlight(args[1].readText().byToken(args[1],
 				IterationStyle.Everything, TokenStyle.Source));
 		}
 		return 0;
 	}
@ -213,20 +199,9 @@ int main(string[] args)
 	if (json)
 	{
 		CircularBuffer!(Token) tokens;
-		if (args.length == 1)
+        File f = args.length == 1 ? stdin : File(args[1]);
-		{
+        tokens = new CircularBuffer!(Token)(CIRC_BUFF_SIZE,
-			// Read from stdin
+            f.byLine(KeepTerminator.yes).join().byToken!(char[])());
 			auto f = appender!string();
 			char[] buf;
 			while (stdin.readln(buf))
 				f.put(buf);
 			tokens = new CircularBuffer!(Token)(CIRC_BUFF_SIZE, byToken!string(f.data));
 		}
 		else
 		{
 			// read given file
 			tokens = new CircularBuffer!(Token)(CIRC_BUFF_SIZE, byToken!string(readText(args[1])));
 		}
 		auto mod = parseModule(tokens);
 		mod.writeJSONTo(stdout);
 		return 0;
--- a/parser.d
+++ b/parser.d
@ -26,6 +26,7 @@ public:
 	this(InputRange!Token tokens, TokenType open, TokenType close)
 	{
 		super(0, tokens);
        this.range = tokens;
 		this.open = open;
 		this.close = close;
 	}
@ -35,7 +36,7 @@ public:
 		return _empty;
 	}
-	override Token front() const @property
+	override Token front() @property
 	{
 		return range.front;
 	}
@ -47,14 +48,20 @@ public:
 			++depth;
 		else if (range.front == close)
 			--depth;
-		_empty = depth == 0;
+		_empty = depth == 0 || range.empty;
 	}
    invariant()
    {
        assert (range);
        assert (depth >= 0);
    }
 private:
 	int depth;
 	TokenType open;
 	TokenType close;
-	TokenBuffer range;
+	InputRange!(Token) range;
 	bool _empty;
 }
--- a/std/d/lexer.d
+++ b/std/d/lexer.d
@ -105,8 +105,8 @@ import std.d.entities;
 public:
 /**
- * Represents a D token
+* Represents a D token
- */
+*/
 struct Token
 {
 	/// The token type.
@ -122,28 +122,28 @@ struct Token
 	uint startIndex;
 	/**
-	 * Check to see if the token is of the same type and has the same string
+	* Check to see if the token is of the same type and has the same string
-	 * representation as the given token.
+	* representation as the given token.
-	 */
+	*/
 	bool opEquals(ref const(Token) other) const
 	{
 		return other.type == type && other.value == value;
 	}
 	/**
-	 * Checks to see if the token's string representation is equal to the given
+	* Checks to see if the token's string representation is equal to the given
-	 * string.
+	* string.
-	 */
+	*/
 	bool opEquals(string value) const { return this.value == value; }
 	/**
-	 * Checks to see if the token is of the given type.
+	* Checks to see if the token is of the given type.
-	 */
+	*/
 	bool opEquals(TokenType type) const { return type == type; }
 	/**
-	 * Comparison operator orders tokens by start index.
+	* Comparison operator orders tokens by start index.
-	 */
+	*/
 	int opCmp(size_t i) const
 	{
 		if (startIndex < i) return -1;
@ -164,16 +164,16 @@ enum IterationStyle
 	IncludeComments = 0b0001,
 	/// Includes whitespace
 	IncludeWhitespace = 0b0010,
-	/// Include $(LINK2 http://dlang.org/lex.html#Special%20Tokens%20Sequence, special token sequences)
+	/// Include $(LINK2 http://dlang.org/lex.html#specialtokens, special tokens)
 	IncludeSpecialTokens = 0b0100,
-	/// Do not terminate iteration upon reaching the ___EOF__ token
+	/// Do not stop iteration on reaching the ___EOF__ token
 	IgnoreEOF = 0b1000,
-	/// Include everything, including the __EOF__ token.
+	/// Include everything
 	Everything = IncludeComments | IncludeWhitespace | IgnoreEOF
 }
 /**
- * Configuration of the string lexing style. These flags may be combined with a
+ * Configuration of the token lexing style. These flags may be combined with a
 * bitwise or.
 */
 enum TokenStyle : uint
@ -187,10 +187,10 @@ enum TokenStyle : uint
 	Default = 0b0000,
 	/**
-	 * Escape sequences will not be processed. An escaped quote character will
+	* Escape sequences will not be processed. An escaped quote character will
-	 * not terminate string lexing, but it will not be replaced with the quote
+	* not terminate string lexing, but it will not be replaced with the quote
-	 * character in the token.
+	* character in the token.
-	 */
+	*/
 	NotEscaped = 0b0001,
 	/**
@ -259,8 +259,8 @@ struct TokenRange(R) if (isForwardRange!(R) && is(ElementType!(R) == char))
 	}
 	/**
-	 * Returns: the current token
+	* Returns: the current token
-	 */
+	*/
 	override Token front() const @property
 	{
 		enforce(!_empty, "Cannot call front() on empty token range");
@ -268,8 +268,8 @@ struct TokenRange(R) if (isForwardRange!(R) && is(ElementType!(R) == char))
 	}
 	/**
-	 * Returns the current token and then removes it from the range
+	* Returns the current token and then removes it from the range
-	 */
+	*/
 	override Token moveFront()
 	{
 		auto r = front();
@ -2486,7 +2486,6 @@ pure nothrow TokenType lookupTokenType(const string input)
 		default: break;
 		}
 		break;
 	case 6:
 		switch (input)
 		{
@ -2595,8 +2594,8 @@ pure nothrow TokenType lookupTokenType(const string input)
 class Trie(K, V) if (isInputRange!K): TrieNode!(K, V)
 {
 	/**
-	 * Adds the given value to the trie with the given key
+	* Adds the given value to the trie with the given key
-	 */
+	*/
 	void add(K key, V value) pure
 	{
 		TrieNode!(K,V) current = this;
`@ -1,2 +1,2 @@`
	`dmd .d std/d/.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline`	`#dmd .d std/d/.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline`
	`#dmd .d std/d/.d -g -m64 -w -wi -property -ofdscanner -L-lsqlite3 #-unittest`	`dmd .d std/d/.d -g -m64 -w -wi -property -ofdscanner -L-lsqlite3 #-unittest`