D-Scanner/parser.d


//          Copyright Brian Schott (Sir Alaran) 2012.
// Distributed under the Boost Software License, Version 1.0.
//    (See accompanying file LICENSE_1_0.txt or copy at
//          http://www.boost.org/LICENSE_1_0.txt)

module parser;

import std.stream;
import std.array;
import std.stdio;
import std.algorithm;

import types, tokenizer;
import langutils;


/**
 * Params:
 *     tokens = the array of tokens
 *     index = an index into tokens such that tokens[index].type == open
 *     open = the opening delimiter
 *     close = the closing delimiter
 * Returns: all tokens that are between the balanced delimiters that start at
 *     tokens[index], not including the delimiters. If the delimiters in tokens
 *     are not balanced, this function will return tokens[index + 1 .. $];
 */
const(Token)[] betweenBalanced(const Token[] tokens, ref size_t index, TokenType open,
	TokenType close)
in
{
	assert (tokens[index] == open);
}
body
{
	++index;
	size_t start = index;
	int depth = 1;
	while (depth > 0 && index < tokens.length)
	{
		if (tokens[index] == open) ++depth;
		else if (tokens[index] == close) --depth;
		++index;
	}
	return tokens[start .. index - 1];
}


/**
 * See_also: betweenBalanced
 */
const(Token)[] betweenBalancedBraces(const Token[] tokens, ref size_t index)
{
	return betweenBalanced(tokens, index, TokenType.LBrace, TokenType.RBrace);
}


/**
 * See_also: betweenBalanced
 */
const(Token)[] betweenBalancedParens(const Token[] tokens, ref size_t index)
{
	return betweenBalanced(tokens, index, TokenType.LParen, TokenType.RParen);
}


/**
 * See_also: betweenBalanced
 */
const(Token)[] betweenBalancedBrackets(const Token[] tokens, ref size_t index)
{
	return betweenBalanced(tokens, index, TokenType.LBracket, TokenType.RBracket);
}


/**
 * If tokens[index] is currently openToken, advances index until it refers to a
 * location in tokens directly after the balanced occurance of closeToken. If
 * tokens[index] is closeToken, decrements index
 *
 */
void skipBalanced(alias openToken, alias closeToken)(const Token[] tokens, ref size_t index)
{
	int depth = tokens[index] == openToken ? 1 : -1;
	int deltaIndex = depth;
	index += deltaIndex;
	for (; index < tokens.length && index > 0 && depth != 0; index += deltaIndex)
	{
		switch (tokens[index].type)
		{
		case openToken: ++depth; break;
		case closeToken: --depth; break;
		default: break;
		}
	}
}

void skipParens(const Token[] tokens, ref size_t index)
{
	skipBalanced!(TokenType.LParen, TokenType.RParen)(tokens, index);
}

void skipBrackets(const Token[] tokens, ref size_t index)
{
	skipBalanced!(TokenType.LBracket, TokenType.RBracket)(tokens, index);
}

void skipBraces(const Token[] tokens, ref size_t index)
{
	skipBalanced!(TokenType.LBrace, TokenType.RBrace)(tokens, index);
}

/**
 * Params:
 *     tokens = the token array to examine
 *     index = an indext into tokens such that tokens[index].type == open
 *     open = the opening delimiter
 *     close = the closing delimiter
 * Returns: a string representing the contents of the two delimiters. This will
 *     not preserve whitespace, but it will place a single space character after
 *     a comma and between identifiers.
 */
string content(const Token[] tokens, ref size_t index, TokenType open, TokenType close)
in
{
	assert (tokens[index] == open);
}
body
{
	index++;
	auto app = appender!string();
	int depth = 1;
	while (depth > 0 && index < tokens.length)
	{
		if (tokens[index] == open) ++depth;
		else if (tokens[index] == close) --depth;
		else if (tokens[index] == TokenType.Comma)
		{
			app.put(", ");
		}
		else
			app.put(tokens[index].value);
		++index;
	}
	return app.data;
}


/**
 * See_also: content
 */
string parenContent(const Token[]tokens, ref size_t index)
{
	return "(" ~ content(tokens, index, TokenType.LParen, TokenType.RParen) ~ ")";
}


/**
 * See_also: content
 */
string bracketContent(const Token[]tokens, ref size_t index)
{
	return "[" ~ content(tokens, index, TokenType.LBracket, TokenType.RBracket) ~ "]";
}


/**
 * Advances index until it indexes a character in tokens after a right brace if
 * index initially indexed a right brace, or advances index until it indexes a
 * character after a simicolon otherwise.
 */
void skipBlockStatement(const Token[] tokens, ref size_t index)
{
	if (tokens[index] == TokenType.LBrace)
		betweenBalancedBraces(tokens, index);
	else
	{
		skipPastNext(tokens, TokenType.Semicolon, index);
	}
}


/**
 * Advances index until it indexes a character in tokens directly after a token
 * of type type. This function handles nesting of braces, brackets, and
 * parenthesis
 */
void skipPastNext(const Token[] tokens, TokenType type, ref size_t index)
{
	while (index < tokens.length)
	{
		if (tokens[index].type == TokenType.LBrace)
			betweenBalancedBraces(tokens, index);
		else if (tokens[index].type == TokenType.LParen)
			betweenBalancedParens(tokens, index);
		else if (tokens[index].type == TokenType.LBracket)
			betweenBalancedBrackets(tokens, index);
		else if (tokens[index].type == type)
		{
			++index;
			return;
		}
		else
			++index;
	}
}

string parseTypeDeclaration(const Token[] tokens, ref size_t index)
{
	auto type = tokens[index++].value.idup;
	buildingType: while (index < tokens.length)
	{
		switch (tokens[index].type)
		{
		case TokenType.LBracket:
			type ~= bracketContent(tokens, index);
			break;
		case TokenType.Not:
			type ~= tokens[index++].value;
			if (tokens[index] == TokenType.LParen)
				type ~= parenContent(tokens, index);
			else
				type ~= tokens[index++].value;
			break;
		case TokenType.Star:
		case TokenType.BitAnd:
			type ~= tokens[index++].value;
			break;
		default:
			break buildingType;
		}
	}
	return type;
}

/**
 * Parses a module from a token array.
 * Params:
 *     protection = the default protection level for a block statement
 *     attributes = the default attributes for a block statement
 * Returns: the parsed module
 */
Module parseModule(const Token[] tokens, string protection = "public", string[] attributes = [])
{
	string type;
	string name;
	string localProtection = "";
	string[] localAttributes = [];

	void resetLocals()
	{
		type = "";
		name = "";
		localProtection = "";
		localAttributes = [];
	}

	Module mod = new Module;
	size_t index = 0;
	while(index < tokens.length)
	{
		switch(tokens[index].type)
		{
		case TokenType.Else:
		case TokenType.Mixin:
		case TokenType.Assert:
			++index;
			tokens.skipBlockStatement(index);
			break;
		case TokenType.Alias:
			tokens.skipBlockStatement(index);
			break;
		case TokenType.Import:
			mod.imports ~= parseImports(tokens, index);
			resetLocals();
			break;
		case TokenType.Version:
			++index;
			if (tokens[index] == TokenType.LParen)
			{
				tokens.betweenBalancedParens(index);
				if (tokens[index] == TokenType.LBrace)
					mod.merge(parseModule(betweenBalancedBraces(tokens, index),
						localProtection.empty() ? protection : localProtection,
						attributes));
			}
			else if (tokens[index] == TokenType.Assign)
				tokens.skipBlockStatement(index);
			break;
		case TokenType.Deprecated:
		case TokenType.Nothrow:
		case TokenType.Override:
		case TokenType.Synchronized:
		case TokenType.AtDisable:
		case TokenType.AtProperty:
		case TokenType.AtSafe:
		case TokenType.AtSystem:
		case TokenType.Abstract:
		case TokenType.Final:
		case TokenType.Gshared:
		case TokenType.Static:
			localAttributes ~= tokens[index++].value;
			break;
		case TokenType.Const:
		case TokenType.Immutable:
		case TokenType.Inout:
		case TokenType.Pure:
		case TokenType.Scope:
		case TokenType.Shared:
			auto tmp = tokens[index++].value;
			if (tokens[index] == TokenType.LParen)
				type = tmp ~ parenContent(tokens, index);
			else if (tokens[index] == TokenType.Colon)
			{
				index++;
				attributes ~= tmp;
			}
			localAttributes ~= tmp;
			break;
		case TokenType.Align:
		case TokenType.Extern:
			string attribute = tokens[index++].value;
			if (tokens[index] == TokenType.LParen)
				attribute ~= parenContent(tokens, index);
			if (tokens[index] == TokenType.LBrace)
				mod.merge(parseModule(betweenBalancedBraces(tokens, index),
					localProtection.empty() ? protection : localProtection,
					attributes ~ attribute));
			else if (tokens[index] == TokenType.Colon)
			{
				++index;
				attributes ~= attribute;
			}
			else
				localAttributes ~= attribute;
			break;
		case TokenType.PROTECTION_BEGIN: .. case TokenType.PROTECTION_END:
			string p = tokens[index++].value;
			if (tokens[index] == TokenType.Colon)
			{
				protection = p;
				++index;
			}
			else if (tokens[index] == TokenType.LBrace)
				mod.merge(parseModule(betweenBalancedBraces(tokens, index),
					p, attributes ~ localAttributes));
			else
				localProtection = p;
			break;
		case TokenType.Module:
			++index;
			while (index < tokens.length && tokens[index] != TokenType.Semicolon)
				mod.name ~= tokens[index++].value;
			++index;
			resetLocals();
			break;
		case TokenType.Union:
			mod.unions ~= parseUnion(tokens, index,
				localProtection.empty() ? protection : localProtection,
					localAttributes ~ attributes);
			resetLocals();
			break;
		case TokenType.Class:
			mod.classes ~= parseClass(tokens, index,
				localProtection.empty() ? protection : localProtection,
					localAttributes ~ attributes);
			resetLocals();
			break;
		case TokenType.Interface:
			mod.interfaces ~= parseInterface(tokens, index,
				localProtection.empty() ? protection : localProtection,
					localAttributes ~ attributes);
			resetLocals();
			break;
		case TokenType.Struct:
			mod.structs ~= parseStruct(tokens, index,
				localProtection.empty() ? protection : localProtection,
					localAttributes ~ attributes);
			resetLocals();
			break;
		case TokenType.Enum:
			mod.enums ~= parseEnum(tokens, index,
				localProtection.empty() ? protection : localProtection,
					localAttributes ~ attributes);
			resetLocals();
			break;
		case TokenType.Template:
			++index; // template
			++index; // name
			if (tokens[index] == TokenType.LParen)
				tokens.betweenBalancedParens(index); // params
			if (tokens[index] == TokenType.LBrace)
				tokens.betweenBalancedBraces(index); // body
			resetLocals();
			break;
		case TokenType.TYPES_BEGIN: .. case TokenType.TYPES_END:
		case TokenType.Auto:
		case TokenType.Identifier:
			if (type.empty())
			{
				type = tokens.parseTypeDeclaration(index);
			}
			else
			{
				name = tokens[index++].value;
				if (index >= tokens.length) break;
				if (tokens[index] == TokenType.LParen)
				{
					mod.functions ~= parseFunction(tokens, index, type, name,
						tokens[index].lineNumber,
						localProtection.empty() ? protection : localProtection,
						attributes ~ localAttributes);
				}
				else
				{
					Variable v = new Variable;
					v.name = name;
					v.type = type;
					v.attributes = localAttributes ~ attributes;
					v.protection = localProtection.empty() ? protection : localProtection;
					v.line = tokens[index].lineNumber;
					mod.variables ~= v;
				}
				resetLocals();
			}
			break;
		case TokenType.Unittest:
			++index;
			if (!tokens.empty() && tokens[index] == TokenType.LBrace)
				tokens.skipBlockStatement(index);
			resetLocals();
			break;
		case TokenType.Tilde:
			++index;
			if (tokens[index] == TokenType.This)
			{
				name = "~";
				goto case;
			}
			break;
		case TokenType.This:
			name ~= tokens[index++].value;
			if (tokens[index] == TokenType.LParen)
			{
				mod.functions ~= parseFunction(tokens, index, "", name,
					tokens[index - 1].lineNumber,
					localProtection.empty() ? protection : localProtection,
					localAttributes ~ attributes);
			}
			resetLocals();
			break;
		default:
			++index;
			break;
		}
	}
	return mod;
}


/**
 * Parses an import statement
 * Returns: only the module names that were imported, not which symbols were
 * selectively improted.
 */
string[] parseImports(const Token[] tokens, ref size_t index)
{
	assert(tokens[index] == TokenType.Import);
	++index;
	auto app = appender!(string[])();
	string im;
	while (index < tokens.length)
	{
		switch(tokens[index].type)
		{
		case TokenType.Comma:
			++index;
			app.put(im);
			im = "";
			break;
		case TokenType.Assign:
		case TokenType.Semicolon:
			app.put(im);
			++index;
			return app.data;
		case TokenType.Colon:
			app.put(im);
			tokens.skipBlockStatement(index);
			return app.data;
		default:
			im ~= tokens[index++].value;
			break;
		}
	}
	return app.data;
}


/**
 * Parses an enum declaration
 */
Enum parseEnum(const Token[] tokens, ref size_t index, string protection,
	string[] attributes)
in
{
	assert (tokens[index] == TokenType.Enum);
}
body
{
	++index;
	Enum e = new Enum;
	e.line = tokens[index].lineNumber;
	e.name = tokens[index++].value;

	if (tokens[index] == TokenType.Colon)
	{
		++index;
		e.type = tokens[index++].value;
	}
	else
		e.type = "uint";

	if (tokens[index] != TokenType.LBrace)
	{
		tokens.skipBlockStatement(index);
		return e;
	}

	auto r = betweenBalancedBraces(tokens, index);
	for (size_t i = 0; i < r.length;)
	{
		if (r[i].type == TokenType.Identifier)
		{
			EnumMember member;
			member.line = r[i].lineNumber;
			member.name = r[i].value;
			e.members ~= member;
			r.skipPastNext(TokenType.Comma, i);
		}
		else
			++i;
	}
	return e;
}


/**
 * Parses a function declaration
 */
Function parseFunction(const Token[] tokens, ref size_t index, string type,
	string name, uint line, string protection, string[] attributes)
in
{
	assert (tokens[index] == TokenType.LParen);
}
body
{
	Function f = new Function;
	f.name = name;
	f.returnType = type;
	f.line = line;
	f.attributes.insertInPlace(f.attributes.length, attributes);

	Variable[] vars1 = parseParameters(tokens, index);
	if (tokens[index] == TokenType.LParen)
	{
		f.templateParameters.insertInPlace(f.templateParameters.length,
			map!("a.type")(vars1));
		f.parameters.insertInPlace(f.parameters.length,
			parseParameters(tokens, index));
	}
	else
		f.parameters.insertInPlace(f.parameters.length, vars1);

	attributeLoop: while(index < tokens.length)
	{
		switch (tokens[index].type)
		{
		case TokenType.Immutable:
		case TokenType.Const:
		case TokenType.Pure:
		case TokenType.AtTrusted:
		case TokenType.AtProperty:
		case TokenType.Nothrow:
		case TokenType.Final:
		case TokenType.Override:
			f.attributes ~= tokens[index++].value;
			break;
		default:
			break attributeLoop;
		}
	}

	if (tokens[index] == TokenType.If)
		f.constraint = parseConstraint(tokens, index);
	while (index < tokens.length &&
		(tokens[index] == TokenType.In || tokens[index] == TokenType.Out
		|| tokens[index] == TokenType.Body))
	{
		++index;
		if (index < tokens.length && tokens[index] == TokenType.LBrace)
			tokens.skipBlockStatement(index);
	}
	if (index >= tokens.length)
		return f;
	if (tokens[index] == TokenType.LBrace)
		tokens.skipBlockStatement(index);
	else if (tokens[index] == TokenType.Semicolon)
		++index;
	return f;
}

string parseConstraint(const Token[] tokens, ref size_t index)
{
	auto appender = appender!(string)();
	assert(tokens[index] == TokenType.If);
	appender.put(tokens[index++].value);
	assert(tokens[index] == TokenType.LParen);
	return "if " ~ parenContent(tokens, index);
}

Variable[] parseParameters(const Token[] tokens, ref size_t index)
in
{
	assert (tokens[index] == TokenType.LParen);
}
body
{
	auto appender = appender!(Variable[])();
	Variable v = new Variable;
	auto r = betweenBalancedParens(tokens, index);
	size_t i = 0;
	while (i < r.length)
	{
		switch(r[i].type)
		{
		case TokenType.In:
		case TokenType.Out:
		case TokenType.Ref:
		case TokenType.Scope:
		case TokenType.Lazy:
		case TokenType.Const:
		case TokenType.Immutable:
		case TokenType.Shared:
		case TokenType.Inout:
			auto tmp = r[i++].value;
			if (r[i] == TokenType.LParen)
				v.type ~= tmp ~ parenContent(r, i);
			else
				v.attributes ~= tmp;
			break;
		case TokenType.Colon:
			i++;
			r.skipPastNext(TokenType.Comma, i);
			appender.put(v);
			v = new Variable;
			break;
		case TokenType.Comma:
			++i;
			appender.put(v);
			v = new Variable;
			break;
		default:
			if (v.type.empty())
			{
				v.type = r.parseTypeDeclaration(i);
				if (i >= r.length)
					appender.put(v);
			}
			else
			{
				v.line = r[i].lineNumber;
				v.name = r[i++].value;
				appender.put(v);
				if (i < r.length && r[i] == TokenType.Vararg)
				{
					v.type ~= " ...";
				}
				v = new Variable;
				r.skipPastNext(TokenType.Comma, i);
			}
			break;
		}
	}
	return appender.data;
}

string[] parseBaseClassList(const Token[] tokens, ref size_t index)
in
{
	assert(tokens[index] == TokenType.Colon);
}
body
{
	auto appender = appender!(string[])();
	++index;
	while (index < tokens.length)
	{
		if (tokens[index] == TokenType.Identifier)
		{
			string base = parseTypeDeclaration(tokens, index);
			appender.put(base);
			if (tokens[index] == TokenType.Comma)
				++index;
			else
				break;
		}
		else
			break;
	}
	return appender.data;
}

void parseStructBody(const Token[] tokens, ref size_t index, Struct st)
{
	st.bodyStart = tokens[index].startIndex;
	Module m = parseModule(betweenBalancedBraces(tokens, index));
	st.bodyEnd = tokens[index - 1].startIndex;
	st.functions.insertInPlace(0, m.functions);
	st.variables.insertInPlace(0, m.variables);
}


Struct parseStructOrUnion(const Token[] tokens, ref size_t index, string protection,
	string[] attributes)
{
	Struct s = new Struct;
	s.line = tokens[index].lineNumber;
	s.attributes = attributes;
	s.protection = protection;
	s.name = tokens[index++].value;
	if (tokens[index] == TokenType.LParen)
		s.templateParameters.insertInPlace(s.templateParameters.length,
			map!("a.type")(parseParameters(tokens, index)));

	if (index >= tokens.length) return s;

	if (tokens[index] == TokenType.If)
		s.constraint = parseConstraint(tokens, index);

	if (index >= tokens.length) return s;

	if (tokens[index] == TokenType.LBrace)
		parseStructBody(tokens, index, s);
	else
		tokens.skipBlockStatement(index);
	return s;
}

Struct parseStruct(const Token[] tokens, ref size_t index, string protection,
	string[] attributes)
in
{
	assert(tokens[index] == TokenType.Struct);
}
body
{
	return parseStructOrUnion(tokens, ++index, protection, attributes);
}

Struct parseUnion(const Token[] tokens, ref size_t index, string protection,
	string[] attributes)
in
{
	assert(tokens[index] == TokenType.Union);
}
body
{
	return parseStructOrUnion(tokens, ++index, protection, attributes);
}

Inherits parseInherits(const Token[] tokens, ref size_t index, string protection,
	string[] attributes)
{
	auto i = new Inherits;
	i.line = tokens[index].lineNumber;
	i.name = tokens[index++].value;
	i.protection = protection;
	i.attributes.insertInPlace(i.attributes.length, attributes);
	if (tokens[index] == TokenType.LParen)
		i.templateParameters.insertInPlace(i.templateParameters.length,
			map!("a.type")(parseParameters(tokens, index)));

	if (index >= tokens.length) return i;

	if (tokens[index] == TokenType.If)
		i.constraint = parseConstraint(tokens, index);

	if (index >= tokens.length) return i;

	if (tokens[index] == TokenType.Colon)
		i.baseClasses = parseBaseClassList(tokens, index);

	if (index >= tokens.length) return i;

	if (tokens[index] == TokenType.LBrace)
		parseStructBody(tokens, index, i);
	else
		tokens.skipBlockStatement(index);
	return i;
}

Inherits parseInterface(const Token[] tokens, ref size_t index, string protection,
	string[] attributes)
in
{
	assert (tokens[index] == TokenType.Interface);
}
body
{
	return parseInherits(tokens, ++index, protection, attributes);
}


Inherits parseClass(const Token[] tokens, ref size_t index, string protection,
	string[] attributes)
in
{
	assert(tokens[index] == TokenType.Class);
}
body
{
	return parseInherits(tokens, ++index, protection, attributes);
}