Merge branch 'range-based-lexer' of https://github.com/Hackerpilot/Dscanner into range-based-lexer

This commit is contained in:
Hackerpilot 2013-01-27 14:22:11 +00:00
commit 1055a47087
7 changed files with 100 additions and 112 deletions

View File

@ -1,2 +1,2 @@
dmd *.d std/d/*.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline #dmd *.d std/d/*.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline
#dmd *.d std/d/*.d -g -m64 -w -wi -property -ofdscanner -L-lsqlite3 #-unittest dmd *.d std/d/*.d -g -m64 -w -wi -property -ofdscanner -L-lsqlite3 #-unittest

View File

@ -14,28 +14,28 @@ class CircularBuffer(T) : InputRange!(T)
{ {
public: public:
this (size_t size, InputRange!(T) range) this (size_t size, InputRange!(T) range)
{ {
this.range = range; this.range = range;
this.margin = size; this.margin = size;
data = new T[(margin * 2) + 1]; data = new T[(margin * 2) + 1];
if (range.empty()) if (range.empty())
{ {
_empty = true; _empty = true;
return; return;
} }
for (size_t i = 0; i <= margin && !this.range.empty(); ++i) for (size_t i = 0; i <= margin && !this.range.empty(); ++i)
{ {
data[i] = this.range.front(); data[i] = this.range.front();
this.range.popFront(); this.range.popFront();
end++; end++;
} }
} }
override T front() const @property override T front() @property
{ {
return data[index]; return data[index];
} }
T peek(int offset = 1) T peek(int offset = 1)
in in
@ -52,32 +52,32 @@ public:
return abs(offset) <= margin && sourceIndex + offset >= 0; return abs(offset) <= margin && sourceIndex + offset >= 0;
} }
override void popFront() override void popFront()
in in
{ {
assert (!_empty); assert (!_empty);
} }
body body
{ {
index = (index + 1) % data.length; index = (index + 1) % data.length;
++sourceIndex; ++sourceIndex;
if (range.empty()) if (range.empty())
{ {
if (index == end) if (index == end)
_empty = true; _empty = true;
} }
else else
{ {
data[end] = range.front(); data[end] = range.front();
end = (end + 1) % data.length; end = (end + 1) % data.length;
range.popFront(); range.popFront();
} }
} }
bool empty() const @property bool empty() const @property
{ {
return _empty; return _empty;
} }
override T moveFront() override T moveFront()
{ {
@ -94,6 +94,7 @@ public:
result = dg(front); result = dg(front);
if (result) if (result)
break; break;
popFront();
} }
return result; return result;
} }
@ -107,18 +108,19 @@ public:
result = dg(i, front); result = dg(i, front);
if (result) if (result)
break; break;
popFront();
} }
return result; return result;
} }
private: private:
InputRange!(T) range; InputRange!(T) range;
immutable size_t margin; immutable size_t margin;
T[] data; T[] data;
size_t sourceIndex; size_t sourceIndex;
size_t end; size_t end;
size_t index; size_t index;
bool _empty; bool _empty;
} }
unittest unittest
@ -131,10 +133,10 @@ unittest
unittest unittest
{ {
int[] arr = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; int[] arr = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
auto buf = CircularBuffer!(int, int[])(2, arr); auto buf = CircularBuffer!(int, int[])(2, arr);
assert (buf.data.length == 5); assert (buf.data.length == 5);
auto iterated = array(buf); auto iterated = array(buf);
assert (iterated == arr); assert (iterated == arr);
} }

View File

@ -10,8 +10,6 @@ import std.stdio;
import std.array; import std.array;
import std.d.lexer; import std.d.lexer;
import langutils;
void writeSpan(string cssClass, string value) void writeSpan(string cssClass, string value)
{ {
stdout.write(`<span class="`, cssClass, `">`, value.replace("&", "&amp;").replace("<", "&lt;"), `</span>`); stdout.write(`<span class="`, cssClass, `">`, value.replace("&", "&amp;").replace("<", "&lt;"), `</span>`);

View File

@ -6,6 +6,7 @@
module langutils; module langutils;
import std.array; import std.array;
import std.algorithm;
import std.d.lexer; import std.d.lexer;
@ -43,7 +44,7 @@ string combineTokens(ref const Token[] tokens)
return app.data; return app.data;
} }
pure string getTypeFromToken(const Token t) pure nothrow string getTypeFromToken(const Token t)
{ {
switch (t.type) switch (t.type)
{ {
@ -73,8 +74,14 @@ pure string getTypeFromToken(const Token t)
} }
} }
pure bool isIdentifierOrType(inout Token t) pure bool isIdentifierOrType(const Token t)
{ {
return t.type == TokenType.Identifier || (t.type > TokenType.TYPES_BEGIN return t.type == TokenType.Identifier || (t.type > TokenType.TYPES_BEGIN
&& TokenType.TYPES_END); && TokenType.TYPES_END);
} }
pure bool isDocComment(ref const Token t)
{
return t.value.startsWith("///") || t.value.startsWith("/**")
|| t.value.startsWith("/++");
}

47
main.d
View File

@ -15,6 +15,7 @@ import std.parallelism;
import std.path; import std.path;
import std.regex; import std.regex;
import std.stdio; import std.stdio;
import std.range;
import std.d.lexer; import std.d.lexer;
import autocomplete; import autocomplete;
@ -28,7 +29,7 @@ import circularbuffer;
immutable size_t CIRC_BUFF_SIZE = 4; immutable size_t CIRC_BUFF_SIZE = 4;
pure bool isLineOfCode(TokenType t) pure nothrow bool isLineOfCode(TokenType t)
{ {
switch(t) switch(t)
{ {
@ -138,36 +139,21 @@ int main(string[] args)
{ {
if (args.length == 1) if (args.length == 1)
{ {
auto f = appender!string(); writeln(stdin.byLine(KeepTerminator.yes).join().byToken().count!(a => isLineOfCode(a.type))());
char[] buf;
while (stdin.readln(buf))
f.put(buf);
writeln(f.data.byToken().count!(a => isLineOfCode(a.type))());
} }
else else
{ {
writeln(args[1..$].map!(a => a.readText().byToken())().joiner() writeln(args[1..$].map!(a => File(a).byLine(KeepTerminator.yes).join().byToken(a))()
.count!(a => isLineOfCode(a.type))()); .joiner().count!(a => isLineOfCode(a.type))());
} }
return 0; return 0;
} }
if (highlight) if (highlight)
{ {
if (args.length == 1) File f = args.length == 1 ? stdin : File(args[1]);
{ highlighter.highlight(f.byLine(KeepTerminator.yes).join().byToken(
auto f = appender!string(); "", IterationStyle.Everything, TokenStyle.Source));
char[] buf;
while (stdin.readln(buf))
f.put(buf);
highlighter.highlight(f.data.byToken("stdin", IterationStyle.Everything,
TokenStyle.Source));
}
else
{
highlighter.highlight(args[1].readText().byToken(args[1],
IterationStyle.Everything, TokenStyle.Source));
}
return 0; return 0;
} }
@ -213,20 +199,9 @@ int main(string[] args)
if (json) if (json)
{ {
CircularBuffer!(Token) tokens; CircularBuffer!(Token) tokens;
if (args.length == 1) File f = args.length == 1 ? stdin : File(args[1]);
{ tokens = new CircularBuffer!(Token)(CIRC_BUFF_SIZE,
// Read from stdin f.byLine(KeepTerminator.yes).join().byToken!(char[])());
auto f = appender!string();
char[] buf;
while (stdin.readln(buf))
f.put(buf);
tokens = new CircularBuffer!(Token)(CIRC_BUFF_SIZE, byToken!string(f.data));
}
else
{
// read given file
tokens = new CircularBuffer!(Token)(CIRC_BUFF_SIZE, byToken!string(readText(args[1])));
}
auto mod = parseModule(tokens); auto mod = parseModule(tokens);
mod.writeJSONTo(stdout); mod.writeJSONTo(stdout);
return 0; return 0;

View File

@ -26,6 +26,7 @@ public:
this(InputRange!Token tokens, TokenType open, TokenType close) this(InputRange!Token tokens, TokenType open, TokenType close)
{ {
super(0, tokens); super(0, tokens);
this.range = tokens;
this.open = open; this.open = open;
this.close = close; this.close = close;
} }
@ -35,7 +36,7 @@ public:
return _empty; return _empty;
} }
override Token front() const @property override Token front() @property
{ {
return range.front; return range.front;
} }
@ -47,14 +48,20 @@ public:
++depth; ++depth;
else if (range.front == close) else if (range.front == close)
--depth; --depth;
_empty = depth == 0; _empty = depth == 0 || range.empty;
} }
invariant()
{
assert (range);
assert (depth >= 0);
}
private: private:
int depth; int depth;
TokenType open; TokenType open;
TokenType close; TokenType close;
TokenBuffer range; InputRange!(Token) range;
bool _empty; bool _empty;
} }

View File

@ -105,8 +105,8 @@ import std.d.entities;
public: public:
/** /**
* Represents a D token * Represents a D token
*/ */
struct Token struct Token
{ {
/// The token type. /// The token type.
@ -122,28 +122,28 @@ struct Token
uint startIndex; uint startIndex;
/** /**
* Check to see if the token is of the same type and has the same string * Check to see if the token is of the same type and has the same string
* representation as the given token. * representation as the given token.
*/ */
bool opEquals(ref const(Token) other) const bool opEquals(ref const(Token) other) const
{ {
return other.type == type && other.value == value; return other.type == type && other.value == value;
} }
/** /**
* Checks to see if the token's string representation is equal to the given * Checks to see if the token's string representation is equal to the given
* string. * string.
*/ */
bool opEquals(string value) const { return this.value == value; } bool opEquals(string value) const { return this.value == value; }
/** /**
* Checks to see if the token is of the given type. * Checks to see if the token is of the given type.
*/ */
bool opEquals(TokenType type) const { return type == type; } bool opEquals(TokenType type) const { return type == type; }
/** /**
* Comparison operator orders tokens by start index. * Comparison operator orders tokens by start index.
*/ */
int opCmp(size_t i) const int opCmp(size_t i) const
{ {
if (startIndex < i) return -1; if (startIndex < i) return -1;
@ -164,16 +164,16 @@ enum IterationStyle
IncludeComments = 0b0001, IncludeComments = 0b0001,
/// Includes whitespace /// Includes whitespace
IncludeWhitespace = 0b0010, IncludeWhitespace = 0b0010,
/// Include $(LINK2 http://dlang.org/lex.html#Special%20Tokens%20Sequence, special token sequences) /// Include $(LINK2 http://dlang.org/lex.html#specialtokens, special tokens)
IncludeSpecialTokens = 0b0100, IncludeSpecialTokens = 0b0100,
/// Do not terminate iteration upon reaching the ___EOF__ token /// Do not stop iteration on reaching the ___EOF__ token
IgnoreEOF = 0b1000, IgnoreEOF = 0b1000,
/// Include everything, including the __EOF__ token. /// Include everything
Everything = IncludeComments | IncludeWhitespace | IgnoreEOF Everything = IncludeComments | IncludeWhitespace | IgnoreEOF
} }
/** /**
* Configuration of the string lexing style. These flags may be combined with a * Configuration of the token lexing style. These flags may be combined with a
* bitwise or. * bitwise or.
*/ */
enum TokenStyle : uint enum TokenStyle : uint
@ -187,10 +187,10 @@ enum TokenStyle : uint
Default = 0b0000, Default = 0b0000,
/** /**
* Escape sequences will not be processed. An escaped quote character will * Escape sequences will not be processed. An escaped quote character will
* not terminate string lexing, but it will not be replaced with the quote * not terminate string lexing, but it will not be replaced with the quote
* character in the token. * character in the token.
*/ */
NotEscaped = 0b0001, NotEscaped = 0b0001,
/** /**
@ -259,8 +259,8 @@ struct TokenRange(R) if (isForwardRange!(R) && is(ElementType!(R) == char))
} }
/** /**
* Returns: the current token * Returns: the current token
*/ */
override Token front() const @property override Token front() const @property
{ {
enforce(!_empty, "Cannot call front() on empty token range"); enforce(!_empty, "Cannot call front() on empty token range");
@ -268,8 +268,8 @@ struct TokenRange(R) if (isForwardRange!(R) && is(ElementType!(R) == char))
} }
/** /**
* Returns the current token and then removes it from the range * Returns the current token and then removes it from the range
*/ */
override Token moveFront() override Token moveFront()
{ {
auto r = front(); auto r = front();
@ -2486,7 +2486,6 @@ pure nothrow TokenType lookupTokenType(const string input)
default: break; default: break;
} }
break; break;
case 6: case 6:
switch (input) switch (input)
{ {
@ -2595,8 +2594,8 @@ pure nothrow TokenType lookupTokenType(const string input)
class Trie(K, V) if (isInputRange!K): TrieNode!(K, V) class Trie(K, V) if (isInputRange!K): TrieNode!(K, V)
{ {
/** /**
* Adds the given value to the trie with the given key * Adds the given value to the trie with the given key
*/ */
void add(K key, V value) pure void add(K key, V value) pure
{ {
TrieNode!(K,V) current = this; TrieNode!(K,V) current = this;