D-Scanner/parser.d

812 lines
19 KiB
D

// Copyright Brian Schott (Sir Alaran) 2012.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
module parser;
import std.stream;
import std.array;
import std.stdio;
import std.algorithm;
import types, tokenizer;
import langutils;
/**
* Params:
* tokens = the array of tokens
* index = an index into tokens such that tokens[index].type == open
* open = the opening delimiter
* close = the closing delimiter
* Returns: all tokens that are between the balanced delimiters that start at
* tokens[index], not including the delimiters. If the delimiters in tokens
* are not balanced, this function will return tokens[index + 1 .. $];
*/
const(Token)[] betweenBalanced(const Token[] tokens, ref size_t index, TokenType open,
TokenType close)
in
{
assert (tokens[index] == open);
}
body
{
++index;
size_t start = index;
int depth = 1;
while (depth > 0 && index < tokens.length)
{
if (tokens[index] == open) ++depth;
else if (tokens[index] == close) --depth;
++index;
}
return tokens[start .. index - 1];
}
/**
* See_also: betweenBalanced
*/
const(Token)[] betweenBalancedBraces(const Token[] tokens, ref size_t index)
{
return betweenBalanced(tokens, index, TokenType.lBrace, TokenType.rBrace);
}
/**
* See_also: betweenBalanced
*/
const(Token)[] betweenBalancedParens(const Token[] tokens, ref size_t index)
{
return betweenBalanced(tokens, index, TokenType.lParen, TokenType.rParen);
}
/**
* See_also: betweenBalanced
*/
const(Token)[] betweenBalancedBrackets(const Token[] tokens, ref size_t index)
{
return betweenBalanced(tokens, index, TokenType.lBracket, TokenType.rBracket);
}
void skipBalanced(alias Op, alias Cl)(const Token[] tokens, ref size_t index)
{
int depth = tokens[index] == Op ? 1 : -1;
int deltaIndex = depth;
index += deltaIndex;
for (; index < tokens.length && index > 0 && depth != 0; index += deltaIndex)
{
switch (tokens[index].type)
{
case Op: ++depth; break;
case Cl: --depth; break;
default: break;
}
}
}
void skipParens(const Token[] tokens, ref size_t index)
{
skipBalanced!(TokenType.lParen, TokenType.rParen)(tokens, index);
}
void skipBrackets(const Token[] tokens, ref size_t index)
{
skipBalanced!(TokenType.lBracket, TokenType.rBracket)(tokens, index);
}
/**
* Params:
* tokens = the token array to examine
* index = an indext into tokens such that tokens[index].type == open
* open = the opening delimiter
* close = the closing delimiter
* Returns: a string representing the contents of the two delimiters. This will
* not preserve whitespace, but it will place a single space character after
* a comma and between identifiers.
*/
string content(const Token[] tokens, ref size_t index, TokenType open, TokenType close)
in
{
assert (tokens[index] == open);
}
body
{
index++;
auto app = appender!string();
int depth = 1;
while (depth > 0 && index < tokens.length)
{
if (tokens[index] == open) ++depth;
else if (tokens[index] == close) --depth;
else if (tokens[index] == TokenType.comma)
{
app.put(", ");
}
else
app.put(tokens[index].value);
++index;
}
return app.data;
}
/**
* See_also: content
*/
string parenContent(const Token[]tokens, ref size_t index)
{
return "(" ~ content(tokens, index, TokenType.lParen, TokenType.rParen) ~ ")";
}
/**
* See_also: content
*/
string bracketContent(const Token[]tokens, ref size_t index)
{
return "[" ~ content(tokens, index, TokenType.lBracket, TokenType.rBracket) ~ "]";
}
/**
* Advances index until it indexes a character in tokens after a right brace if
* index initially indexed a right brace, or advances index until it indexes a
* character after a simicolon otherwise.
*/
void skipBlockStatement(const Token[] tokens, ref size_t index)
{
if (tokens[index] == TokenType.lBrace)
betweenBalancedBraces(tokens, index);
else
{
skipPastNext(tokens, TokenType.semicolon, index);
}
}
/**
* Advances index until it indexes a character in tokens directly after a token
* of type type. This function handles nesting of braces, brackets, and
* parenthesis
*/
void skipPastNext(const Token[] tokens, TokenType type, ref size_t index)
{
while (index < tokens.length)
{
if (tokens[index].type == TokenType.lBrace)
betweenBalancedBraces(tokens, index);
else if (tokens[index].type == TokenType.lParen)
betweenBalancedParens(tokens, index);
else if (tokens[index].type == TokenType.lBracket)
betweenBalancedBrackets(tokens, index);
else if (tokens[index].type == type)
{
++index;
return;
}
else
++index;
}
}
string parseTypeDeclaration(const Token[] tokens, ref size_t index)
{
auto type = tokens[index++].value.idup;
buildingType: while (index < tokens.length)
{
switch (tokens[index].type)
{
case TokenType.lBracket:
type ~= bracketContent(tokens, index);
break;
case TokenType.not:
type ~= tokens[index++].value;
if (tokens[index] == TokenType.lParen)
type ~= parenContent(tokens, index);
else
type ~= tokens[index++].value;
break;
case TokenType.star:
case TokenType.bitAnd:
type ~= tokens[index++].value;
break;
default:
break buildingType;
}
}
return type;
}
/**
* Parses a module from a token array.
* Params:
* protection = the default protection level for a block statement
* attributes = the default attributes for a block statement
* Returns: the parsed module
*/
Module parseModule(const Token[] tokens, string protection = "public", string[] attributes = [])
{
string type;
string name;
string localProtection = "";
string[] localAttributes = [];
void resetLocals()
{
type = "";
name = "";
localProtection = "";
localAttributes = [];
}
Module mod = new Module;
size_t index = 0;
while(index < tokens.length)
{
switch(tokens[index].type)
{
case TokenType.tElse:
case TokenType.tMixin:
case TokenType.tAssert:
++index;
tokens.skipBlockStatement(index);
break;
case TokenType.tAlias:
tokens.skipBlockStatement(index);
break;
case TokenType.tImport:
mod.imports ~= parseImports(tokens, index);
resetLocals();
break;
case TokenType.tVersion:
++index;
if (tokens[index] == TokenType.lParen)
{
tokens.betweenBalancedParens(index);
if (tokens[index] == TokenType.lBrace)
mod.merge(parseModule(betweenBalancedBraces(tokens, index),
localProtection.empty() ? protection : localProtection,
attributes));
}
else if (tokens[index] == TokenType.assign)
tokens.skipBlockStatement(index);
break;
case TokenType.atDisable:
case TokenType.atProperty:
case TokenType.atSafe:
case TokenType.atSystem:
case TokenType.tAbstract:
case TokenType.tConst:
case TokenType.tDeprecated:
case TokenType.tExtern:
case TokenType.tFinal:
case TokenType.t__gshared:
case TokenType.tImmutable:
case TokenType.tInout:
case TokenType.tNothrow:
case TokenType.tOverride:
case TokenType.tPure:
case TokenType.tScope:
case TokenType.tShared:
case TokenType.tStatic:
case TokenType.tSynchronized:
auto tmp = tokens[index++].value;
if (tokens[index] == TokenType.lParen)
type = tmp ~ parenContent(tokens, index);
else if (tokens[index] == TokenType.colon)
{
index++;
attributes ~= tmp;
}
else
localAttributes ~= tmp;
break;
case TokenType.tAlign:
string attribute = tokens[index++].value;
if (tokens[index] == TokenType.lParen)
attribute ~= parenContent(tokens, index);
if (tokens[index] == TokenType.lBrace)
mod.merge(parseModule(betweenBalancedBraces(tokens, index),
localProtection.empty() ? protection : localProtection,
attributes ~ attribute));
else if (tokens[index] == TokenType.colon)
{
++index;
attributes ~= attribute;
}
else
localAttributes ~= attribute;
break;
case TokenType.PROTECTION_BEGIN: .. case TokenType.PROTECTION_END:
string p = tokens[index++].value;
if (tokens[index] == TokenType.colon)
{
protection = p;
++index;
}
else if (tokens[index] == TokenType.lBrace)
mod.merge(parseModule(betweenBalancedBraces(tokens, index),
p, attributes ~ localAttributes));
else
localProtection = p;
break;
case TokenType.tModule:
++index;
while (index < tokens.length && tokens[index] != TokenType.semicolon)
mod.name ~= tokens[index++].value;
++index;
resetLocals();
break;
case TokenType.tUnion:
mod.unions ~= parseUnion(tokens, index,
localProtection.empty() ? protection : localProtection,
localAttributes ~ attributes);
resetLocals();
break;
case TokenType.tClass:
mod.classes ~= parseClass(tokens, index,
localProtection.empty() ? protection : localProtection,
localAttributes ~ attributes);
resetLocals();
break;
case TokenType.tInterface:
mod.interfaces ~= parseInterface(tokens, index,
localProtection.empty() ? protection : localProtection,
localAttributes ~ attributes);
resetLocals();
break;
case TokenType.tStruct:
mod.structs ~= parseStruct(tokens, index,
localProtection.empty() ? protection : localProtection,
localAttributes ~ attributes);
resetLocals();
break;
case TokenType.tEnum:
mod.enums ~= parseEnum(tokens, index,
localProtection.empty() ? protection : localProtection,
localAttributes ~ attributes);
resetLocals();
break;
case TokenType.tTemplate:
++index; // template
++index; // name
if (tokens[index] == TokenType.lParen)
tokens.betweenBalancedParens(index); // params
if (tokens[index] == TokenType.lBrace)
tokens.betweenBalancedBraces(index); // body
resetLocals();
break;
case TokenType.TYPES_BEGIN: .. case TokenType.TYPES_END:
case TokenType.tAuto:
case TokenType.identifier:
if (type.empty())
{
type = tokens.parseTypeDeclaration(index);
}
else
{
name = tokens[index++].value;
if (index >= tokens.length) break;
if (tokens[index] == TokenType.lParen)
{
mod.functions ~= parseFunction(tokens, index, type, name,
tokens[index].lineNumber,
localProtection.empty() ? protection : localProtection,
attributes ~ localAttributes);
}
else
{
Variable v = new Variable;
v.name = name;
v.type = type;
v.attributes = localAttributes ~ attributes;
v.protection = localProtection.empty() ? protection : localProtection;
v.line = tokens[index].lineNumber;
mod.variables ~= v;
}
resetLocals();
}
break;
case TokenType.tUnittest:
++index;
if (!tokens.empty() && tokens[index] == TokenType.lBrace)
tokens.skipBlockStatement(index);
resetLocals();
break;
case TokenType.tilde:
++index;
if (tokens[index] == TokenType.tThis)
{
name = "~";
goto case;
}
break;
case TokenType.tThis:
name ~= tokens[index++].value;
if (tokens[index] == TokenType.lParen)
{
mod.functions ~= parseFunction(tokens, index, "", name,
tokens[index - 1].lineNumber,
localProtection.empty() ? protection : localProtection,
localAttributes ~ attributes);
}
resetLocals();
break;
default:
++index;
break;
}
}
return mod;
}
/**
* Parses an import statement
* Returns: only the module names that were imported, not which symbols were
* selectively improted.
*/
string[] parseImports(const Token[] tokens, ref size_t index)
{
assert(tokens[index] == TokenType.tImport);
++index;
auto app = appender!(string[])();
string im;
while (index < tokens.length)
{
switch(tokens[index].type)
{
case TokenType.comma:
++index;
app.put(im);
im = "";
break;
case TokenType.assign:
case TokenType.semicolon:
app.put(im);
++index;
return app.data;
case TokenType.colon:
app.put(im);
tokens.skipBlockStatement(index);
return app.data;
default:
im ~= tokens[index++].value;
break;
}
}
return app.data;
}
/**
* Parses an enum declaration
*/
Enum parseEnum(const Token[] tokens, ref size_t index, string protection,
string[] attributes)
in
{
assert (tokens[index] == TokenType.tEnum);
}
body
{
++index;
Enum e = new Enum;
e.line = tokens[index].lineNumber;
e.name = tokens[index++].value;
if (tokens[index] == TokenType.colon)
{
++index;
e.type = tokens[index++].value;
}
else
e.type = "uint";
if (tokens[index] != TokenType.lBrace)
{
tokens.skipBlockStatement(index);
return e;
}
auto r = betweenBalancedBraces(tokens, index);
for (size_t i = 0; i < r.length;)
{
if (r[i].type == TokenType.identifier)
{
EnumMember member;
member.line = r[i].lineNumber;
member.name = r[i].value;
e.members ~= member;
r.skipPastNext(TokenType.comma, i);
}
else
++i;
}
return e;
}
/**
* Parses a function declaration
*/
Function parseFunction(const Token[] tokens, ref size_t index, string type,
string name, uint line, string protection, string[] attributes)
in
{
assert (tokens[index] == TokenType.lParen);
}
body
{
Function f = new Function;
f.name = name;
f.returnType = type;
f.line = line;
f.attributes.insertInPlace(f.attributes.length, attributes);
Variable[] vars1 = parseParameters(tokens, index);
if (tokens[index] == TokenType.lParen)
{
f.templateParameters.insertInPlace(f.templateParameters.length,
map!("a.type")(vars1));
f.parameters.insertInPlace(f.parameters.length,
parseParameters(tokens, index));
}
else
f.parameters.insertInPlace(f.parameters.length, vars1);
attributeLoop: while(index < tokens.length)
{
switch (tokens[index].type)
{
case TokenType.tImmutable:
case TokenType.tConst:
case TokenType.tPure:
case TokenType.atTrusted:
case TokenType.atProperty:
case TokenType.tNothrow:
case TokenType.tFinal:
case TokenType.tOverride:
f.attributes ~= tokens[index++].value;
break;
default:
break attributeLoop;
}
}
if (tokens[index] == TokenType.tIf)
f.constraint = parseConstraint(tokens, index);
while (index < tokens.length &&
(tokens[index] == TokenType.tIn || tokens[index] == TokenType.tOut
|| tokens[index] == TokenType.tBody))
{
++index;
if (index < tokens.length && tokens[index] == TokenType.lBrace)
tokens.skipBlockStatement(index);
}
if (index >= tokens.length)
return f;
if (tokens[index] == TokenType.lBrace)
tokens.skipBlockStatement(index);
else if (tokens[index] == TokenType.semicolon)
++index;
return f;
}
string parseConstraint(const Token[] tokens, ref size_t index)
{
auto appender = appender!(string)();
assert(tokens[index] == TokenType.tIf);
appender.put(tokens[index++].value);
assert(tokens[index] == TokenType.lParen);
return "if " ~ parenContent(tokens, index);
}
Variable[] parseParameters(const Token[] tokens, ref size_t index)
in
{
assert (tokens[index] == TokenType.lParen);
}
body
{
auto appender = appender!(Variable[])();
Variable v = new Variable;
auto r = betweenBalancedParens(tokens, index);
size_t i = 0;
while (i < r.length)
{
switch(r[i].type)
{
case TokenType.tIn:
case TokenType.tOut:
case TokenType.tRef:
case TokenType.tScope:
case TokenType.tLazy:
case TokenType.tConst:
case TokenType.tImmutable:
case TokenType.tShared:
case TokenType.tInout:
auto tmp = r[i++].value;
if (r[i] == TokenType.lParen)
v.type ~= tmp ~ parenContent(r, i);
else
v.attributes ~= tmp;
break;
case TokenType.colon:
i++;
r.skipPastNext(TokenType.comma, i);
appender.put(v);
v = new Variable;
break;
case TokenType.comma:
++i;
appender.put(v);
v = new Variable;
break;
default:
if (v.type.empty())
{
v.type = r.parseTypeDeclaration(i);
if (i >= r.length)
appender.put(v);
}
else
{
v.line = r[i].lineNumber;
v.name = r[i++].value;
appender.put(v);
if (i < r.length && r[i] == TokenType.vararg)
{
v.type ~= " ...";
}
v = new Variable;
r.skipPastNext(TokenType.comma, i);
}
break;
}
}
return appender.data;
}
string[] parseBaseClassList(const Token[] tokens, ref size_t index)
in
{
assert(tokens[index] == TokenType.colon);
}
body
{
auto appender = appender!(string[])();
++index;
while (index < tokens.length)
{
if (tokens[index] == TokenType.identifier)
{
string base = parseTypeDeclaration(tokens, index);
appender.put(base);
if (tokens[index] == TokenType.comma)
++index;
else
break;
}
else
break;
}
return appender.data;
}
void parseStructBody(const Token[] tokens, ref size_t index, Struct st)
{
st.bodyStart = tokens[index].startIndex;
Module m = parseModule(betweenBalancedBraces(tokens, index));
st.bodyEnd = tokens[index - 1].startIndex;
st.functions.insertInPlace(0, m.functions);
st.variables.insertInPlace(0, m.variables);
}
Struct parseStructOrUnion(const Token[] tokens, ref size_t index, string protection,
string[] attributes)
{
Struct s = new Struct;
s.line = tokens[index].lineNumber;
s.attributes = attributes;
s.protection = protection;
s.name = tokens[index++].value;
if (tokens[index] == TokenType.lParen)
s.templateParameters.insertInPlace(s.templateParameters.length,
map!("a.type")(parseParameters(tokens, index)));
if (index >= tokens.length) return s;
if (tokens[index] == TokenType.tIf)
s.constraint = parseConstraint(tokens, index);
if (index >= tokens.length) return s;
if (tokens[index] == TokenType.lBrace)
parseStructBody(tokens, index, s);
else
tokens.skipBlockStatement(index);
return s;
}
Struct parseStruct(const Token[] tokens, ref size_t index, string protection,
string[] attributes)
in
{
assert(tokens[index] == TokenType.tStruct);
}
body
{
return parseStructOrUnion(tokens, ++index, protection, attributes);
}
Struct parseUnion(const Token[] tokens, ref size_t index, string protection,
string[] attributes)
in
{
assert(tokens[index] == TokenType.tUnion);
}
body
{
return parseStructOrUnion(tokens, ++index, protection, attributes);
}
Inherits parseInherits(const Token[] tokens, ref size_t index, string protection,
string[] attributes)
{
auto i = new Inherits;
i.line = tokens[index].lineNumber;
i.name = tokens[index++].value;
i.protection = protection;
i.attributes.insertInPlace(i.attributes.length, attributes);
if (tokens[index] == TokenType.lParen)
i.templateParameters.insertInPlace(i.templateParameters.length,
map!("a.type")(parseParameters(tokens, index)));
if (index >= tokens.length) return i;
if (tokens[index] == TokenType.tIf)
i.constraint = parseConstraint(tokens, index);
if (index >= tokens.length) return i;
if (tokens[index] == TokenType.colon)
i.baseClasses = parseBaseClassList(tokens, index);
if (index >= tokens.length) return i;
if (tokens[index] == TokenType.lBrace)
parseStructBody(tokens, index, i);
else
tokens.skipBlockStatement(index);
return i;
}
Inherits parseInterface(const Token[] tokens, ref size_t index, string protection,
string[] attributes)
in
{
assert (tokens[index] == TokenType.tInterface);
}
body
{
return parseInherits(tokens, ++index, protection, attributes);
}
Inherits parseClass(const Token[] tokens, ref size_t index, string protection,
string[] attributes)
in
{
assert(tokens[index] == TokenType.tClass);
}
body
{
return parseInherits(tokens, ++index, protection, attributes);
}