D-Scanner/parser.d
Hackerpilot bbd2ec13ea Added foreach_reverse to list of tokens used for line-of-code count
Alphabetized constants
Cleaned up constant names
Fixed defects with decimal parsing
Eliminated iteration style parameter to lexWhitespace, as it didn't really speed things up.
Added support for imaginary literals
2012-04-27 02:26:34 -07:00

824 lines
19 KiB
D

// Copyright Brian Schott (Sir Alaran) 2012.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
module parser;
import std.stream;
import std.array;
import std.stdio;
import std.algorithm;
import types, tokenizer;
import langutils;
/**
* Params:
* tokens = the array of tokens
* index = an index into tokens such that tokens[index].type == open
* open = the opening delimiter
* close = the closing delimiter
* Returns: all tokens that are between the balanced delimiters that start at
* tokens[index], not including the delimiters. If the delimiters in tokens
* are not balanced, this function will return tokens[index + 1 .. $];
*/
const(Token)[] betweenBalanced(const Token[] tokens, ref size_t index, TokenType open,
TokenType close)
in
{
assert (tokens[index] == open);
}
body
{
++index;
size_t start = index;
int depth = 1;
while (depth > 0 && index < tokens.length)
{
if (tokens[index] == open) ++depth;
else if (tokens[index] == close) --depth;
++index;
}
return tokens[start .. index - 1];
}
/**
* See_also: betweenBalanced
*/
const(Token)[] betweenBalancedBraces(const Token[] tokens, ref size_t index)
{
return betweenBalanced(tokens, index, TokenType.LBrace, TokenType.RBrace);
}
/**
* See_also: betweenBalanced
*/
const(Token)[] betweenBalancedParens(const Token[] tokens, ref size_t index)
{
return betweenBalanced(tokens, index, TokenType.LParen, TokenType.RParen);
}
/**
* See_also: betweenBalanced
*/
const(Token)[] betweenBalancedBrackets(const Token[] tokens, ref size_t index)
{
return betweenBalanced(tokens, index, TokenType.LBracket, TokenType.RBracket);
}
/**
* If tokens[index] is currently openToken, advances index until it refers to a
* location in tokens directly after the balanced occurance of closeToken. If
* tokens[index] is closeToken, decrements index
*
*/
void skipBalanced(alias openToken, alias closeToken)(const Token[] tokens, ref size_t index)
{
int depth = tokens[index] == openToken ? 1 : -1;
int deltaIndex = depth;
index += deltaIndex;
for (; index < tokens.length && index > 0 && depth != 0; index += deltaIndex)
{
switch (tokens[index].type)
{
case openToken: ++depth; break;
case closeToken: --depth; break;
default: break;
}
}
}
void skipParens(const Token[] tokens, ref size_t index)
{
skipBalanced!(TokenType.LParen, TokenType.RParen)(tokens, index);
}
void skipBrackets(const Token[] tokens, ref size_t index)
{
skipBalanced!(TokenType.LBracket, TokenType.RBracket)(tokens, index);
}
void skipBraces(const Token[] tokens, ref size_t index)
{
skipBalanced!(TokenType.LBrace, TokenType.RBrace)(tokens, index);
}
/**
* Params:
* tokens = the token array to examine
* index = an indext into tokens such that tokens[index].type == open
* open = the opening delimiter
* close = the closing delimiter
* Returns: a string representing the contents of the two delimiters. This will
* not preserve whitespace, but it will place a single space character after
* a comma and between identifiers.
*/
string content(const Token[] tokens, ref size_t index, TokenType open, TokenType close)
in
{
assert (tokens[index] == open);
}
body
{
index++;
auto app = appender!string();
int depth = 1;
while (depth > 0 && index < tokens.length)
{
if (tokens[index] == open) ++depth;
else if (tokens[index] == close) --depth;
else if (tokens[index] == TokenType.Comma)
{
app.put(", ");
}
else
app.put(tokens[index].value);
++index;
}
return app.data;
}
/**
* See_also: content
*/
string parenContent(const Token[]tokens, ref size_t index)
{
return "(" ~ content(tokens, index, TokenType.LParen, TokenType.RParen) ~ ")";
}
/**
* See_also: content
*/
string bracketContent(const Token[]tokens, ref size_t index)
{
return "[" ~ content(tokens, index, TokenType.LBracket, TokenType.RBracket) ~ "]";
}
/**
* Advances index until it indexes a character in tokens after a right brace if
* index initially indexed a right brace, or advances index until it indexes a
* character after a simicolon otherwise.
*/
void skipBlockStatement(const Token[] tokens, ref size_t index)
{
if (tokens[index] == TokenType.LBrace)
betweenBalancedBraces(tokens, index);
else
{
skipPastNext(tokens, TokenType.Semicolon, index);
}
}
/**
* Advances index until it indexes a character in tokens directly after a token
* of type type. This function handles nesting of braces, brackets, and
* parenthesis
*/
void skipPastNext(const Token[] tokens, TokenType type, ref size_t index)
{
while (index < tokens.length)
{
if (tokens[index].type == TokenType.LBrace)
betweenBalancedBraces(tokens, index);
else if (tokens[index].type == TokenType.LParen)
betweenBalancedParens(tokens, index);
else if (tokens[index].type == TokenType.LBracket)
betweenBalancedBrackets(tokens, index);
else if (tokens[index].type == type)
{
++index;
return;
}
else
++index;
}
}
string parseTypeDeclaration(const Token[] tokens, ref size_t index)
{
auto type = tokens[index++].value.idup;
buildingType: while (index < tokens.length)
{
switch (tokens[index].type)
{
case TokenType.LBracket:
type ~= bracketContent(tokens, index);
break;
case TokenType.Not:
type ~= tokens[index++].value;
if (tokens[index] == TokenType.LParen)
type ~= parenContent(tokens, index);
else
type ~= tokens[index++].value;
break;
case TokenType.Star:
case TokenType.BitAnd:
type ~= tokens[index++].value;
break;
default:
break buildingType;
}
}
return type;
}
/**
* Parses a module from a token array.
* Params:
* protection = the default protection level for a block statement
* attributes = the default attributes for a block statement
* Returns: the parsed module
*/
Module parseModule(const Token[] tokens, string protection = "public", string[] attributes = [])
{
string type;
string name;
string localProtection = "";
string[] localAttributes = [];
void resetLocals()
{
type = "";
name = "";
localProtection = "";
localAttributes = [];
}
Module mod = new Module;
size_t index = 0;
while(index < tokens.length)
{
switch(tokens[index].type)
{
case TokenType.Else:
case TokenType.Mixin:
case TokenType.Assert:
++index;
tokens.skipBlockStatement(index);
break;
case TokenType.Alias:
tokens.skipBlockStatement(index);
break;
case TokenType.Import:
mod.imports ~= parseImports(tokens, index);
resetLocals();
break;
case TokenType.Version:
++index;
if (tokens[index] == TokenType.LParen)
{
tokens.betweenBalancedParens(index);
if (tokens[index] == TokenType.LBrace)
mod.merge(parseModule(betweenBalancedBraces(tokens, index),
localProtection.empty() ? protection : localProtection,
attributes));
}
else if (tokens[index] == TokenType.Assign)
tokens.skipBlockStatement(index);
break;
case TokenType.Deprecated:
case TokenType.Nothrow:
case TokenType.Override:
case TokenType.Synchronized:
case TokenType.AtDisable:
case TokenType.AtProperty:
case TokenType.AtSafe:
case TokenType.AtSystem:
case TokenType.Abstract:
case TokenType.Final:
case TokenType.Gshared:
case TokenType.Static:
localAttributes ~= tokens[index++].value;
break;
case TokenType.Const:
case TokenType.Immutable:
case TokenType.Inout:
case TokenType.Pure:
case TokenType.Scope:
case TokenType.Shared:
auto tmp = tokens[index++].value;
if (tokens[index] == TokenType.LParen)
type = tmp ~ parenContent(tokens, index);
else if (tokens[index] == TokenType.Colon)
{
index++;
attributes ~= tmp;
}
localAttributes ~= tmp;
break;
case TokenType.Align:
case TokenType.Extern:
string attribute = tokens[index++].value;
if (tokens[index] == TokenType.LParen)
attribute ~= parenContent(tokens, index);
if (tokens[index] == TokenType.LBrace)
mod.merge(parseModule(betweenBalancedBraces(tokens, index),
localProtection.empty() ? protection : localProtection,
attributes ~ attribute));
else if (tokens[index] == TokenType.Colon)
{
++index;
attributes ~= attribute;
}
else
localAttributes ~= attribute;
break;
case TokenType.PROTECTION_BEGIN: .. case TokenType.PROTECTION_END:
string p = tokens[index++].value;
if (tokens[index] == TokenType.Colon)
{
protection = p;
++index;
}
else if (tokens[index] == TokenType.LBrace)
mod.merge(parseModule(betweenBalancedBraces(tokens, index),
p, attributes ~ localAttributes));
else
localProtection = p;
break;
case TokenType.Module:
++index;
while (index < tokens.length && tokens[index] != TokenType.Semicolon)
mod.name ~= tokens[index++].value;
++index;
resetLocals();
break;
case TokenType.Union:
mod.unions ~= parseUnion(tokens, index,
localProtection.empty() ? protection : localProtection,
localAttributes ~ attributes);
resetLocals();
break;
case TokenType.Class:
mod.classes ~= parseClass(tokens, index,
localProtection.empty() ? protection : localProtection,
localAttributes ~ attributes);
resetLocals();
break;
case TokenType.Interface:
mod.interfaces ~= parseInterface(tokens, index,
localProtection.empty() ? protection : localProtection,
localAttributes ~ attributes);
resetLocals();
break;
case TokenType.Struct:
mod.structs ~= parseStruct(tokens, index,
localProtection.empty() ? protection : localProtection,
localAttributes ~ attributes);
resetLocals();
break;
case TokenType.Enum:
mod.enums ~= parseEnum(tokens, index,
localProtection.empty() ? protection : localProtection,
localAttributes ~ attributes);
resetLocals();
break;
case TokenType.Template:
++index; // template
++index; // name
if (tokens[index] == TokenType.LParen)
tokens.betweenBalancedParens(index); // params
if (tokens[index] == TokenType.LBrace)
tokens.betweenBalancedBraces(index); // body
resetLocals();
break;
case TokenType.TYPES_BEGIN: .. case TokenType.TYPES_END:
case TokenType.Auto:
case TokenType.Identifier:
if (type.empty())
{
type = tokens.parseTypeDeclaration(index);
}
else
{
name = tokens[index++].value;
if (index >= tokens.length) break;
if (tokens[index] == TokenType.LParen)
{
mod.functions ~= parseFunction(tokens, index, type, name,
tokens[index].lineNumber,
localProtection.empty() ? protection : localProtection,
attributes ~ localAttributes);
}
else
{
Variable v = new Variable;
v.name = name;
v.type = type;
v.attributes = localAttributes ~ attributes;
v.protection = localProtection.empty() ? protection : localProtection;
v.line = tokens[index].lineNumber;
mod.variables ~= v;
}
resetLocals();
}
break;
case TokenType.Unittest:
++index;
if (!tokens.empty() && tokens[index] == TokenType.LBrace)
tokens.skipBlockStatement(index);
resetLocals();
break;
case TokenType.Tilde:
++index;
if (tokens[index] == TokenType.This)
{
name = "~";
goto case;
}
break;
case TokenType.This:
name ~= tokens[index++].value;
if (tokens[index] == TokenType.LParen)
{
mod.functions ~= parseFunction(tokens, index, "", name,
tokens[index - 1].lineNumber,
localProtection.empty() ? protection : localProtection,
localAttributes ~ attributes);
}
resetLocals();
break;
default:
++index;
break;
}
}
return mod;
}
/**
* Parses an import statement
* Returns: only the module names that were imported, not which symbols were
* selectively improted.
*/
string[] parseImports(const Token[] tokens, ref size_t index)
{
assert(tokens[index] == TokenType.Import);
++index;
auto app = appender!(string[])();
string im;
while (index < tokens.length)
{
switch(tokens[index].type)
{
case TokenType.Comma:
++index;
app.put(im);
im = "";
break;
case TokenType.Assign:
case TokenType.Semicolon:
app.put(im);
++index;
return app.data;
case TokenType.Colon:
app.put(im);
tokens.skipBlockStatement(index);
return app.data;
default:
im ~= tokens[index++].value;
break;
}
}
return app.data;
}
/**
* Parses an enum declaration
*/
Enum parseEnum(const Token[] tokens, ref size_t index, string protection,
string[] attributes)
in
{
assert (tokens[index] == TokenType.Enum);
}
body
{
++index;
Enum e = new Enum;
e.line = tokens[index].lineNumber;
e.name = tokens[index++].value;
if (tokens[index] == TokenType.Colon)
{
++index;
e.type = tokens[index++].value;
}
else
e.type = "uint";
if (tokens[index] != TokenType.LBrace)
{
tokens.skipBlockStatement(index);
return e;
}
auto r = betweenBalancedBraces(tokens, index);
for (size_t i = 0; i < r.length;)
{
if (r[i].type == TokenType.Identifier)
{
EnumMember member;
member.line = r[i].lineNumber;
member.name = r[i].value;
e.members ~= member;
r.skipPastNext(TokenType.Comma, i);
}
else
++i;
}
return e;
}
/**
* Parses a function declaration
*/
Function parseFunction(const Token[] tokens, ref size_t index, string type,
string name, uint line, string protection, string[] attributes)
in
{
assert (tokens[index] == TokenType.LParen);
}
body
{
Function f = new Function;
f.name = name;
f.returnType = type;
f.line = line;
f.attributes.insertInPlace(f.attributes.length, attributes);
Variable[] vars1 = parseParameters(tokens, index);
if (tokens[index] == TokenType.LParen)
{
f.templateParameters.insertInPlace(f.templateParameters.length,
map!("a.type")(vars1));
f.parameters.insertInPlace(f.parameters.length,
parseParameters(tokens, index));
}
else
f.parameters.insertInPlace(f.parameters.length, vars1);
attributeLoop: while(index < tokens.length)
{
switch (tokens[index].type)
{
case TokenType.Immutable:
case TokenType.Const:
case TokenType.Pure:
case TokenType.AtTrusted:
case TokenType.AtProperty:
case TokenType.Nothrow:
case TokenType.Final:
case TokenType.Override:
f.attributes ~= tokens[index++].value;
break;
default:
break attributeLoop;
}
}
if (tokens[index] == TokenType.If)
f.constraint = parseConstraint(tokens, index);
while (index < tokens.length &&
(tokens[index] == TokenType.In || tokens[index] == TokenType.Out
|| tokens[index] == TokenType.Body))
{
++index;
if (index < tokens.length && tokens[index] == TokenType.LBrace)
tokens.skipBlockStatement(index);
}
if (index >= tokens.length)
return f;
if (tokens[index] == TokenType.LBrace)
tokens.skipBlockStatement(index);
else if (tokens[index] == TokenType.Semicolon)
++index;
return f;
}
string parseConstraint(const Token[] tokens, ref size_t index)
{
auto appender = appender!(string)();
assert(tokens[index] == TokenType.If);
appender.put(tokens[index++].value);
assert(tokens[index] == TokenType.LParen);
return "if " ~ parenContent(tokens, index);
}
Variable[] parseParameters(const Token[] tokens, ref size_t index)
in
{
assert (tokens[index] == TokenType.LParen);
}
body
{
auto appender = appender!(Variable[])();
Variable v = new Variable;
auto r = betweenBalancedParens(tokens, index);
size_t i = 0;
while (i < r.length)
{
switch(r[i].type)
{
case TokenType.In:
case TokenType.Out:
case TokenType.Ref:
case TokenType.Scope:
case TokenType.Lazy:
case TokenType.Const:
case TokenType.Immutable:
case TokenType.Shared:
case TokenType.Inout:
auto tmp = r[i++].value;
if (r[i] == TokenType.LParen)
v.type ~= tmp ~ parenContent(r, i);
else
v.attributes ~= tmp;
break;
case TokenType.Colon:
i++;
r.skipPastNext(TokenType.Comma, i);
appender.put(v);
v = new Variable;
break;
case TokenType.Comma:
++i;
appender.put(v);
v = new Variable;
break;
default:
if (v.type.empty())
{
v.type = r.parseTypeDeclaration(i);
if (i >= r.length)
appender.put(v);
}
else
{
v.line = r[i].lineNumber;
v.name = r[i++].value;
appender.put(v);
if (i < r.length && r[i] == TokenType.Vararg)
{
v.type ~= " ...";
}
v = new Variable;
r.skipPastNext(TokenType.Comma, i);
}
break;
}
}
return appender.data;
}
string[] parseBaseClassList(const Token[] tokens, ref size_t index)
in
{
assert(tokens[index] == TokenType.Colon);
}
body
{
auto appender = appender!(string[])();
++index;
while (index < tokens.length)
{
if (tokens[index] == TokenType.Identifier)
{
string base = parseTypeDeclaration(tokens, index);
appender.put(base);
if (tokens[index] == TokenType.Comma)
++index;
else
break;
}
else
break;
}
return appender.data;
}
void parseStructBody(const Token[] tokens, ref size_t index, Struct st)
{
st.bodyStart = tokens[index].startIndex;
Module m = parseModule(betweenBalancedBraces(tokens, index));
st.bodyEnd = tokens[index - 1].startIndex;
st.functions.insertInPlace(0, m.functions);
st.variables.insertInPlace(0, m.variables);
}
Struct parseStructOrUnion(const Token[] tokens, ref size_t index, string protection,
string[] attributes)
{
Struct s = new Struct;
s.line = tokens[index].lineNumber;
s.attributes = attributes;
s.protection = protection;
s.name = tokens[index++].value;
if (tokens[index] == TokenType.LParen)
s.templateParameters.insertInPlace(s.templateParameters.length,
map!("a.type")(parseParameters(tokens, index)));
if (index >= tokens.length) return s;
if (tokens[index] == TokenType.If)
s.constraint = parseConstraint(tokens, index);
if (index >= tokens.length) return s;
if (tokens[index] == TokenType.LBrace)
parseStructBody(tokens, index, s);
else
tokens.skipBlockStatement(index);
return s;
}
Struct parseStruct(const Token[] tokens, ref size_t index, string protection,
string[] attributes)
in
{
assert(tokens[index] == TokenType.Struct);
}
body
{
return parseStructOrUnion(tokens, ++index, protection, attributes);
}
Struct parseUnion(const Token[] tokens, ref size_t index, string protection,
string[] attributes)
in
{
assert(tokens[index] == TokenType.Union);
}
body
{
return parseStructOrUnion(tokens, ++index, protection, attributes);
}
Inherits parseInherits(const Token[] tokens, ref size_t index, string protection,
string[] attributes)
{
auto i = new Inherits;
i.line = tokens[index].lineNumber;
i.name = tokens[index++].value;
i.protection = protection;
i.attributes.insertInPlace(i.attributes.length, attributes);
if (tokens[index] == TokenType.LParen)
i.templateParameters.insertInPlace(i.templateParameters.length,
map!("a.type")(parseParameters(tokens, index)));
if (index >= tokens.length) return i;
if (tokens[index] == TokenType.If)
i.constraint = parseConstraint(tokens, index);
if (index >= tokens.length) return i;
if (tokens[index] == TokenType.Colon)
i.baseClasses = parseBaseClassList(tokens, index);
if (index >= tokens.length) return i;
if (tokens[index] == TokenType.LBrace)
parseStructBody(tokens, index, i);
else
tokens.skipBlockStatement(index);
return i;
}
Inherits parseInterface(const Token[] tokens, ref size_t index, string protection,
string[] attributes)
in
{
assert (tokens[index] == TokenType.Interface);
}
body
{
return parseInherits(tokens, ++index, protection, attributes);
}
Inherits parseClass(const Token[] tokens, ref size_t index, string protection,
string[] attributes)
in
{
assert(tokens[index] == TokenType.Class);
}
body
{
return parseInherits(tokens, ++index, protection, attributes);
}