From 9d5a8ce7e6cd0dd6db69864815d98d968c8dcccc Mon Sep 17 00:00:00 2001 From: Hackerpilot Date: Sat, 21 Apr 2012 06:10:41 -0700 Subject: [PATCH] Checking in the code --- README.md | 231 +++++++++++++- autocomplete.d | 281 +++++++++++++++++ build.sh | 2 + codegen.d | 104 +++++++ highlighter.d | 84 +++++ langutils.d | 427 +++++++++++++++++++++++++ main.d | 199 ++++++++++++ parser.d | 830 +++++++++++++++++++++++++++++++++++++++++++++++++ tokenizer.d | 552 ++++++++++++++++++++++++++++++++ types.d | 567 +++++++++++++++++++++++++++++++++ 10 files changed, 3274 insertions(+), 3 deletions(-) create mode 100644 autocomplete.d create mode 100755 build.sh create mode 100644 codegen.d create mode 100644 highlighter.d create mode 100644 langutils.d create mode 100644 main.d create mode 100644 parser.d create mode 100644 tokenizer.d create mode 100644 types.d diff --git a/README.md b/README.md index f1191e8..dc140d5 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,229 @@ -Dscanner -======== +# Overview +Dscanner is a tool used to analyze D source code. + +### Options +* **--dotComplete** _sourceFile_ _cursorPosition_ - Provide autocompletion for the +insertion of the dot operator. The cursor position is the character position in +the **file**, not the position in the line. +* **--sloc** _sourceFiles_ - count the number of logical lines of code in the given +source files. +* **--json** _sourceFile_ - Generate a JSON summary of the given source file +* **--parenComplete** _sourceFile_ _cursorPosition_ - Provides a listing of function +parameters or pre-defined version identifiers at the cursor position. The cursor +position is the character position in the **file**, not the line. +* **--highlight** _sourceFile_ - Syntax-highlight the given source file. The +resulting HTML will be written to standard output. +* **-I** _includePath_ - Include _includePath_ in the list of paths used to search +for imports. By default dscanner will search in the current working directory as +well as any paths specified in /etc/dmd.conf. + +# Dot Completion + +# Paren Completion + +# JSON output +Generates a JSON summary of the input file. + +### Example +The given D code: + module example; + + import std.stdio; + + interface Iface { + double interfaceMethod(); + } + + class SomeClass(T) if (isSomeString!T) : IFace { + public: + this() {} + void doStuff(T); + override double interfaceMethod() {} + private: + T theTee; + } + + int freeFunction(int x) { return x + x; } + + void main(string[] args) { + + } + +is transformed into the following JSON markup: + + { + "name" : "example", + "imports" : [ + "std.stdio" + ], + "interfaces" : [ + { + "name" : "Iface", + "line" : 5, + "protection" : "public", + "attributes" : [ + ], + "constraint" : "", + "templateParameters" : [ + ], + "functions" : [ + { + "name" : "interfaceMethod", + "line" : 6, + "protection" : "", + "attributes" : [ + ], + "constraint" : "", + "templateParameters" : [ + ], + "parameters" : [ + ], + "returnType" : "double" + } + ], + "variables" : [ + ], + "baseClasses" : [ + ] + } + ], + "classes" : [ + { + "name" : "SomeClass", + "line" : 9, + "protection" : "public", + "attributes" : [ + ], + "constraint" : "if (isSomeString!T)", + "templateParameters" : [ + "T" + ], + "functions" : [ + { + "name" : "this", + "line" : 11, + "protection" : "", + "attributes" : [ + ], + "constraint" : "", + "templateParameters" : [ + ], + "parameters" : [ + ], + "returnType" : "" + }, + { + "name" : "doStuff", + "line" : 12, + "protection" : "", + "attributes" : [ + ], + "constraint" : "", + "templateParameters" : [ + ], + "parameters" : [ + { + "name" : "", + "line" : 0, + "protection" : "", + "attributes" : [ + ], + "type" : "T" + } + ], + "returnType" : "void" + }, + { + "name" : "interfaceMethod", + "line" : 13, + "protection" : "", + "attributes" : [ + "override" + ], + "constraint" : "", + "templateParameters" : [ + ], + "parameters" : [ + ], + "returnType" : "double" + } + ], + "variables" : [ + { + "name" : "theTee", + "line" : 15, + "protection" : "private", + "attributes" : [ + ], + "type" : "T" + } + ], + "baseClasses" : [ + "IFace" + ] + } + ], + "structs" : [ + ], + "structs" : [ + ], + "functions" : [ + { + "name" : "freeFunction", + "line" : 18, + "protection" : "", + "attributes" : [ + ], + "constraint" : "", + "templateParameters" : [ + ], + "parameters" : [ + { + "name" : "x", + "line" : 18, + "protection" : "", + "attributes" : [ + ], + "type" : "int" + } + ], + "returnType" : "int" + }, + { + "name" : "main", + "line" : 20, + "protection" : "", + "attributes" : [ + ], + "constraint" : "", + "templateParameters" : [ + ], + "parameters" : [ + { + "name" : "args", + "line" : 20, + "protection" : "", + "attributes" : [ + ], + "type" : "string[]" + } + ], + "returnType" : "void" + } + ], + "variables" : [ + ], + "enums" : [ + ] + } + +# Line of Code count +This option counts the logical lines of code in the given source files, not +simply the physical lines. More specifically, it counts the number of +semicolons, **if**, **while**, **case**, **foreach**, and **for** tokens in the +given files. + +# Highlighting +Syntax highlights the given file in HTML format. Output is written to _stdout_. +The CSS styling information is currently hard-coded. -Swiss-army knife for D source code \ No newline at end of file diff --git a/autocomplete.d b/autocomplete.d new file mode 100644 index 0000000..d601c1a --- /dev/null +++ b/autocomplete.d @@ -0,0 +1,281 @@ +/******************************************************************************* + * The MIT License + * + * Copyright (c) 2012 Brian Schott (Sir Alaran) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +import std.range; +import std.algorithm; +import std.array; +import std.conv; +import std.stdio; +import std.typecons; + +import parser; +import langutils; +import types; +import tokenizer; + +immutable string[] versions = ["AIX", "all", "Alpha", "ARM", "BigEndian", "BSD", + "Cygwin", "D_Coverage", "D_Ddoc", "DigitalMars", "D_InlineAsm_X86", + "D_InlineAsm_X86_64", "D_LP64", "D_NET", "D_PIC", "D_Version2", + "FreeBSD", "GNU", "HPPA", "HPPA64", "Hurd", "IA64", "LDC", "linux", + "LittleEndian", "MinGW", "MIPS", "MIPS64", "none", "OpenBSD", "OSX", + "Posix", "PPC", "PPC64", "S390", "S390X", "SDC", "SH", "SH64", "SkyOS", + "Solaris", "SPARC", "SPARC64", "SysV3", "SysV4", "unittest", "Win32", + "Win64", "Windows", "X86", "X86_64" +]; + +string[] callChainBackwards(const Token[] tokens, size_t index) +{ + if (index == 0) + return [tokens[index].value]; + string[] callChain; + string current; + loop: while(true) + { + switch(tokens[index].type) + { + case TokenType.tThis: + case TokenType.identifier: + case TokenType.TYPES_BEGIN: .. case TokenType.TYPES_END: + current = tokens[index].value ~ current; + callChain = current ~ callChain; + current = ""; + if (index == 0) + break loop; + else + --index; + if (tokens[index] == TokenType.not) + callChain = callChain[1 .. $]; + break; + case TokenType.rBracket: + tokens.skipBrackets(index); + current ~= "[]"; + break; + case TokenType.rParen: + tokens.skipParens(index); + break; + case TokenType.not: + case TokenType.dot: + if (index == 0) + break loop; + else + --index; + break; + default: + break loop; + } + } + return callChain; +} + + +string[] callChainForwards(const Token[] tokens, size_t index) +{ + string[] callChain; + while (index < tokens.length) + { + switch(tokens[index].type) + { + case TokenType.tNew: + ++index; + break; + case TokenType.tThis: + case TokenType.identifier: + case TokenType.TYPES_BEGIN: .. case TokenType.TYPES_END: + callChain ~= tokens[index++].value; + break; + case TokenType.lParen: + tokens.skipParens(index); + break; + case TokenType.lBracket: + tokens.skipBrackets(index); + callChain[$ - 1] ~= "[i]"; + break; + case TokenType.not: + ++index; + if (tokens.startsWith(TokenType.lParen)) + tokens.skipParens(index); + else + ++index; + break; + default: + break; + } + if (index >= tokens.length || tokens[index] != TokenType.dot) + break; + else + ++index; + } + return callChain; +} + + +struct AutoComplete +{ + this(const (Token)[] tokens, CompletionContext context) + { + this.tokens = tokens; + this.context = context; + } + + string getTypeOfExpression(string[] chain, const Token[] tokens, size_t cursor) + { + if (chain.length == 0) + return "void"; + auto type = typeOfVariable(chain[0], cursor); + if (type == "void") + return type; + chain = chain[1 .. $]; + while (chain.length >= 1) + { + auto typeMap = context.getMembersOfType(type); + if (typeMap is null) + return "void"; + auto memberType = typeMap[chain[0]][0]; + if (memberType is null) + return "void"; + type = memberType; + chain = chain[1 .. $]; + } + return type; + } + + /** + * This is where the magic happens + */ + string typeOfVariable(string symbol, size_t cursor) + { + // int is of type int, double of type double, and so on + if (symbol in typeProperties) + return symbol; + + if (context.getMembersOfType(symbol)) + return symbol; + + // Arbitrarily define the depth of the cursor position as zero + // iterate backwards through the code to try to find the variable + int depth = 0; + auto preceedingTokens = assumeSorted(tokens).lowerBound(cursor); + auto index = preceedingTokens.length - 1; + while (true) + { + if (preceedingTokens[index] == TokenType.lBrace) + --depth; + else if (preceedingTokens[index] == TokenType.rBrace) + ++depth; + else if (depth <= 0 && preceedingTokens[index].value == symbol) + { + // Found the symbol, now determine if it was declared here. + auto p = preceedingTokens[index - 1]; + if ((p == TokenType.tAuto || p == TokenType.tImmutable + || p == TokenType.tConst) + && preceedingTokens[index + 1] == TokenType.assign) + { + auto chain = callChainForwards(tokens, index + 2); + return getTypeOfExpression(chain, tokens, cursor); + } + if (p == TokenType.identifier + || (p.type > TokenType.TYPES_BEGIN + && p.type < TokenType.TYPES_END)) + { + return preceedingTokens[index - 1].value; + } + } + if (index == 0) + break; + else + --index; + } + + // Find all struct or class bodies that we're in. + // Check for the symbol in those class/struct/interface bodies + // if match is found, return it + auto structs = context.getStructsContaining(cursor); + if (symbol == "this" && structs.length > 0) + return minCount!("a.bodyStart > b.bodyStart")(structs)[0].name; + foreach (s; structs) + { + auto t = s.getMemberType(symbol); + if (t !is null) + return t; + } + return "void"; + } + + string symbolAt(size_t cursor) const + { + auto r = assumeSorted(tokens).lowerBound(cursor)[$ - 1]; + if (r.value.length + r.startIndex > cursor) + return r.value; + else + return null; + } + + string parenComplete(size_t cursor) + { + auto index = assumeSorted(tokens).lowerBound(cursor).length; + if (index > 2) + index -= 2; + else + return []; + if (tokens[index] == TokenType.tVersion) + { + return to!string(array(join(map!`a ~ "?1"`(versions), " "))); + } + return ""; + } + + string dotComplete(size_t cursor) + { + auto index = assumeSorted(tokens).lowerBound(cursor).length; + if (index > 2) + index -= 2; + else + return ""; + auto t = tokens[index]; + string[] chain = callChainBackwards(tokens, index); + auto type = getTypeOfExpression(chain, tokens, cursor); + + if (type && type in typeProperties) + { + string r; + foreach (i, prop; typeProperties[type]) + if (i == typeProperties.length) + r = r ~ prop; + else + r = r ~ prop ~ " "; + return r; + } + + const Tuple!(string, string)[string] typeMap = context.getMembersOfType(type); + if (typeMap is null) + return ""; + auto app = appender!(string[])(); + foreach (k, t; typeMap) + app.put(k ~ t[1]); + return to!string(array(join(sort(app.data), " "))); + } + + const(Token)[] tokens; + CompletionContext context; +} diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..8c2fc91 --- /dev/null +++ b/build.sh @@ -0,0 +1,2 @@ +dmd *.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner -D +#dmd *.d -g -unittest -m64 -w -wi -property -oftokenizer diff --git a/codegen.d b/codegen.d new file mode 100644 index 0000000..a8d2df3 --- /dev/null +++ b/codegen.d @@ -0,0 +1,104 @@ +module codegen; + +import std.range; + + +class Trie(K, V) if (isInputRange!K): TrieNode!(K, V) +{ + /** + * Adds the given value to the trie with the given key + */ + void add(K key, V value) pure + { + TrieNode!(K,V) current = this; + foreach(keyPart; key) + { + if ((keyPart in current.children) is null) + { + auto node = new TrieNode!(K, V); + current.children[keyPart] = node; + current = node; + } + else + current = current.children[keyPart]; + } + current.value = value; + } +} + +class TrieNode(K, V) if (isInputRange!K) +{ + V value; + TrieNode!(K,V)[ElementType!K] children; +} + +string printCaseStatements(K, V)(TrieNode!(K,V) node, string indentString) +{ + string caseStatement = ""; + foreach(dchar k, TrieNode!(K,V) v; node.children) + { + caseStatement ~= indentString; + caseStatement ~= "case '"; + caseStatement ~= k; + caseStatement ~= "':\n"; + caseStatement ~= indentString; + caseStatement ~= "\tcurrentToken.value ~= '"; + caseStatement ~= k; + caseStatement ~= "';\n"; + caseStatement ~= indentString; + caseStatement ~= "\tcurrentToken.lineNumber = lineNumber;"; + caseStatement ~= indentString; + caseStatement ~= "\t++endIndex;\n"; + if (v.children.length > 0) + { + caseStatement ~= indentString; + caseStatement ~= "\tif (endIndex >= inputString.length)\n"; + caseStatement ~= indentString; + caseStatement ~= "\t{\n"; + caseStatement ~= indentString; + caseStatement ~= "\t\tcurrentToken.type = " ~ node.children[k].value; + caseStatement ~= ";\n"; + caseStatement ~= indentString; + caseStatement ~= "\t\tbreak;\n"; + caseStatement ~= indentString; + caseStatement ~= "\t}\n"; + caseStatement ~= indentString; + caseStatement ~= "\tswitch (inputString[endIndex])\n"; + caseStatement ~= indentString; + caseStatement ~= "\t{\n"; + caseStatement ~= printCaseStatements(v, indentString ~ "\t"); + caseStatement ~= indentString; + caseStatement ~= "\tdefault:\n"; + caseStatement ~= indentString; + caseStatement ~= "\t\tcurrentToken.type = "; + caseStatement ~= v.value; + caseStatement ~= ";\n"; + caseStatement ~= indentString; + caseStatement ~= "\t\tbreak;\n"; + caseStatement ~= indentString; + caseStatement ~= "\t}\n"; + caseStatement ~= indentString; + caseStatement ~= "\tbreak;\n"; + } + else + { + caseStatement ~= indentString; + caseStatement ~= "\tcurrentToken.type = "; + caseStatement ~= v.value; + caseStatement ~= ";\n"; + caseStatement ~= indentString; + caseStatement ~= "\tbreak;\n"; + } + } + return caseStatement; +} + +string generateCaseTrie(string[] args ...) +{ + auto t = new Trie!(string, string); + for(int i = 0; i < args.length; i+=2) + { + t.add(args[i], args[i+1]); + } + return printCaseStatements(t, ""); +} diff --git a/highlighter.d b/highlighter.d new file mode 100644 index 0000000..872aa0c --- /dev/null +++ b/highlighter.d @@ -0,0 +1,84 @@ +/******************************************************************************* + * The MIT License + * + * Copyright (c) 2012 Brian Schott (Sir Alaran) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +import std.stdio; +import langutils; +import std.array; + +void writeSpan(string cssClass, string value) +{ + stdout.write(``, value.replace("<", "<"), ``); +} + +void highlight(Token[] tokens) +{ + stdout.writeln(q"[ + + + + + +
]");
+
+	foreach (Token t; tokens)
+	{
+		switch (t.type)
+		{
+		case TokenType.KEYWORDS_BEGIN: .. case TokenType.KEYWORDS_END:
+			writeSpan("keyword", t.value);
+			break;
+		case TokenType.TYPES_BEGIN: .. case TokenType.TYPES_END:
+			writeSpan("type", t.value);
+			break;
+		case TokenType.comment:
+			writeSpan("comment", t.value);
+			break;
+		case TokenType.stringLiteral:
+			writeSpan("string", t.value);
+			break;
+		case TokenType.numberLiteral:
+			writeSpan("number", t.value);
+			break;
+		case TokenType.OPERATORS_BEGIN: .. case TokenType.OPERATORS_END:
+			writeSpan("operator", t.value);
+			break;
+		case TokenType.PROPERTIES_BEGIN: .. case TokenType.PROPERTIES_END:
+			writeSpan("property", t.value);
+			break;
+		default:
+			stdout.write(t.value.replace("<", "<"));
+			break;
+		}
+	}
+	stdout.writeln("
\n"); +} diff --git a/langutils.d b/langutils.d new file mode 100644 index 0000000..44657c8 --- /dev/null +++ b/langutils.d @@ -0,0 +1,427 @@ +/******************************************************************************* + * The MIT License + * + * Copyright (c) 2012 Brian Schott (Sir Alaran) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modif y, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +module langutils; + +/** + * Returns: true if input is a access attribute + */ +pure nothrow bool isAccessAttribute(TokenType input) +{ + return input > TokenType.PROTECTION_BEGIN && input < TokenType.PROTECTION_END; +} + +/** + * See_also: isAttribute(TokenType) + */ +pure nothrow bool isAttribute(ref const Token token) +{ + return isAttribute(token.type); +} + +/** + * Returns: true if the given token type is an attribute, false otherwise + */ +pure nothrow bool isAttribute(TokenType input) +{ + if (isAccessAttribute(input)) + return true; + return input > TokenType.ATTRIBUTES_BEGIN && input < TokenType.ATTRIBUTES_END; +} + +/** + * Returns: the token type for the given string. Defaults to "identifier" + */ +pure nothrow TokenType lookupTokenType(const string input) +{ + immutable(TokenType)* type = input in tokenLookup; + if (type !is null) + return *type; + else + return TokenType.identifier; +} + + +/** + * Listing of all the tokens in the D language + */ +enum TokenType: uint +{ +// Operators + OPERATORS_BEGIN, + div, /// / + divEquals, /// /= + dot, /// . + slice, // .. + vararg, /// ... + bitAnd, /// & + bitAndEquals, /// &= + lAnd, /// && + bitOr, /// | + bitOrEquals, /// |= + lOr, /// || + minus, /// - + minusEquals, /// -= + uMinus, /// -- + plus, /// + + plusEquals, /// += + uPlus, /// ++ + less, /// < + lessEqual, /// <= + shiftLeft, /// << + shiftLeftEqual, /// <<= + lessOrGreater, /// <> + lessEqualGreater, // <>= + greater, /// > + greaterEqual, /// >= + shiftRightEqual, /// >>= + unsignedShiftRightEqual, /// >>>= + shiftRight, /// >> + unsignedShiftRight, /// >>> + not, /// ! + notEquals, /// != + notLessEqualGreater, /// !<> + unordered, /// !<>= + notLess, /// !< + notLessEqual, /// !<= + notGreater, /// !> + notGreaterEqual, /// !>= + lParen, /// $(LPAREN) + rParen, /// $(RPAREN) + lBracket, /// [ + rBracket, /// ] + lBrace, /// { + rBrace, /// } + ternary, /// ? + comma, /// , + semicolon, /// ; + colon, /// : + dollar, /// $ + assign, /// = + equals, /// == + star, /// * + mulEquals, /// *= + mod, /// % + modEquals, /// %= + xor, /// ^ + xorEquals, /// ^= + pow, /// ^^ + powEquals, /// ^^= + tilde, /// ~ + catEquals, /// ~= + hash, // # + goesTo, // => + OPERATORS_END, + +// Types + TYPES_BEGIN, + tString, /// string + tBool, /// bool, + tByte, /// byte, + tCdouble, /// cdouble, + tCent, /// cent, + tCfloat, /// cfloat, + tChar, /// char, + tCreal, /// creal, + tDchar, /// dchar, + tDouble, /// double, + tFloat, /// float, + tUbyte, /// ubyte, + tUcent, /// ucent, + tUint, /// uint, + tUlong, /// ulong, + tShort, /// short, + tReal, /// real, + tLong, /// long, + tInt, /// int, + tFunction, /// function, + tIdouble, /// idouble, + tIreal, /// ireal, + tWchar, /// wchar, + tVoid, /// void, + tUshort, /// ushort, + tIfloat, /// if loat, + TYPES_END, + tTemplate, /// template, + +// Keywords + KEYWORDS_BEGIN, + ATTRIBUTES_BEGIN, + tExtern, /// extern, + tAlign, /// align, + tPragma, /// pragma, + tDeprecated, /// deprecated, + PROTECTION_BEGIN, + tPackage, /// package, + tPrivate, /// private, + tProtected, /// protected, + tPublic, /// public, + tExport, /// export, + PROTECTION_END, + tStatic, /// static, + tSynchronized, /// synchronized, + tFinal, /// final + tAbstract, /// abstract, + tConst, /// const, + tAuto, /// auto, + tScope, /// scope, + t__gshared, /// __gshared, + tShared, // shared, + tImmutable, // immutable, + tInout, // inout, + atDisable, /// @disable + ATTRIBUTES_END, + tAlias, /// alias, + tAsm, /// asm, + tAssert, /// assert, + tBody, /// body, + tBreak, /// break, + tCase, /// case, + tCast, /// cast, + tCatch, /// catch, + tClass, /// class, + tContinue, /// continue, + tDebug, /// debug, + tDefault, /// default, + tDelegate, /// delegate, + tDelete, /// delete, + tDo, /// do, + tElse, /// else, + tEnum, /// enum, + tFalse, /// false, + tFinally, /// finally, + tFor, /// for, + tForeach, /// foreach, + tForeach_reverse, /// foreach_reverse, + tGoto, /// goto, + tIf, /// if , + tImport, /// import, + tIn, /// in, + tInterface, /// interface, + tInvariant, /// invariant, + tIs, /// is, + tLazy, /// lazy, + tMacro, /// macro, + tMixin, /// mixin, + tModule, /// module, + tNew, /// new, + tNothrow, /// nothrow, + tNull, /// null, + tOut, /// out, + tOverride, /// override, + tPure, /// pure, + tRef, /// ref, + tReturn, /// return, + tStruct, /// struct, + tSuper, /// super, + tSwitch, /// switch , + tThis, /// this, + tThrow, /// throw, + tTrue, /// true, + tTry, /// try, + tTypedef, /// typedef, + tTypeid, /// typeid, + tTypeof, /// typeof, + tUnion, /// union, + tUnittest, /// unittest, + tVersion, /// version, + tVolatile, /// volatile, + tWhile, /// while , + tWith, /// with, + KEYWORDS_END, + +// Constants + CONSTANTS_BEGIN, + t__FILE__, /// __FILE__, + t__LINE__, /// __LINE__, + + t__thread, /// __thread, + t__traits, /// __traits, + CONSTANTS_END, + +// Properties + PROPERTIES_BEGIN, + + atProperty, /// @property + atSafe, /// @safe + atSystem, /// @system + atTrusted, /// @trusted + PROPERTIES_END, + +// Misc + MISC_BEGIN, + comment, /// /** comment */ or // comment or ///comment + stringLiteral, /// "a string" + numberLiteral, /// int, float, etc... + identifier, + whitespace, /// whitespace + blank, + MISC_END, +} + + +/** + * lookup table for converting strings to tokens + */ +immutable TokenType[string] tokenLookup; + + +static this() +{ + tokenLookup = [ + "abstract" : TokenType.tAbstract, + "alias" : TokenType.tAlias, + "align" : TokenType.tAlign, + "asm" : TokenType.tAsm, + "assert" : TokenType.tAssert, + "auto" : TokenType.tAuto, + "body" : TokenType.tBody, + "bool" : TokenType.tBool, + "break" : TokenType.tBreak, + "byte" : TokenType.tByte, + "case" : TokenType.tCase, + "cast" : TokenType.tCast, + "catch" : TokenType.tCatch, + "cdouble" : TokenType.tCdouble, + "cent" : TokenType.tCent, + "cfloat" : TokenType.tCfloat, + "char" : TokenType.tChar, + "class" : TokenType.tClass, + "const" : TokenType.tConst, + "continue" : TokenType.tContinue, + "creal" : TokenType.tCreal, + "dchar" : TokenType.tDchar, + "debug" : TokenType.tDebug, + "default" : TokenType.tDefault, + "delegate" : TokenType.tDelegate, + "delete" : TokenType.tDelete, + "deprecated" : TokenType.tDeprecated, + "do" : TokenType.tDo, + "double" : TokenType.tDouble, + "else" : TokenType.tElse, + "enum" : TokenType.tEnum, + "export" : TokenType.tExport, + "extern" : TokenType.tExtern, + "false" : TokenType.tFalse, + "final" : TokenType.tFinal, + "finally" : TokenType.tFinally, + "float" : TokenType.tFloat, + "for" : TokenType.tFor, + "foreach" : TokenType.tForeach, + "foreach_reverse" : TokenType.tForeach_reverse, + "function" : TokenType.tFunction, + "goto" : TokenType.tGoto, + "idouble" : TokenType.tIdouble, + "if" : TokenType.tIf, + "ifloat" : TokenType.tIfloat, + "immutable" : TokenType.tImmutable, + "import" : TokenType.tImport, + "in" : TokenType.tIn, + "inout" : TokenType.tInout, + "int" : TokenType.tInt, + "interface" : TokenType.tInterface, + "invariant" : TokenType.tInvariant, + "ireal" : TokenType.tIreal, + "is" : TokenType.tIs, + "lazy" : TokenType.tLazy, + "long" : TokenType.tLong, + "macro" : TokenType.tMacro, + "mixin" : TokenType.tMixin, + "module" : TokenType.tModule, + "new" : TokenType.tNew, + "nothrow" : TokenType.tNothrow, + "null" : TokenType.tNull, + "out" : TokenType.tOut, + "override" : TokenType.tOverride, + "package" : TokenType.tPackage, + "pragma" : TokenType.tPragma, + "private" : TokenType.tPrivate, + "protected" : TokenType.tProtected, + "public" : TokenType.tPublic, + "pure" : TokenType.tPure, + "real" : TokenType.tReal, + "ref" : TokenType.tRef, + "return" : TokenType.tReturn, + "scope" : TokenType.tScope, + "shared" : TokenType.tShared, + "short" : TokenType.tShort, + "static" : TokenType.tStatic, + "struct" : TokenType.tStruct, + "string" : TokenType.tString, + "super" : TokenType.tSuper, + "switch" : TokenType.tSwitch, + "synchronized" : TokenType.tSynchronized, + "template" : TokenType.tTemplate, + "this" : TokenType.tThis, + "throw" : TokenType.tThrow, + "true" : TokenType.tTrue, + "try" : TokenType.tTry, + "typedef" : TokenType.tTypedef, + "typeid" : TokenType.tTypeid, + "typeof" : TokenType.tTypeof, + "ubyte" : TokenType.tUbyte, + "ucent" : TokenType.tUcent, + "uint" : TokenType.tUint, + "ulong" : TokenType.tUlong, + "union" : TokenType.tUnion, + "unittest" : TokenType.tUnittest, + "ushort" : TokenType.tUshort, + "version" : TokenType.tVersion, + "void" : TokenType.tVoid, + "volatile" : TokenType.tVolatile, + "wchar" : TokenType.tWchar, + "while" : TokenType.tWhile, + "with" : TokenType.tWith, + "__FILE__" : TokenType.t__FILE__, + "__LINE__" : TokenType.t__LINE__, + "__gshared" : TokenType.t__gshared, + "__thread" : TokenType.t__thread, + "__traits" : TokenType.t__traits, + "@disable" : TokenType.atDisable, + "@property" : TokenType.atProperty, + "@safe" : TokenType.atSafe, + "@system" : TokenType.atSystem, + "@trusted" : TokenType.atTrusted, + ]; +} + +struct Token +{ + TokenType type; + string value; + uint lineNumber; + size_t startIndex; + bool opEquals(ref const(Token) other) const + { + return other.type == type && other.value == value; + } + bool opEquals(string range) const { return range == value; } + bool opEquals(TokenType t) const { return type == t; } + int opCmp(size_t i) const + { + if (i > startIndex) return -1; + if (i < startIndex) return 1; + return 0; + } +} diff --git a/main.d b/main.d new file mode 100644 index 0000000..24aef01 --- /dev/null +++ b/main.d @@ -0,0 +1,199 @@ +/******************************************************************************* + * The MIT License + * + * Copyright (c) 2012 Brian Schott (Sir Alaran) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +import std.file; +import std.stdio; +import std.algorithm; +import std.conv; +import std.array; +import std.path; +import std.regex; +import std.getopt; +import std.parallelism; +import types; +import tokenizer; +import parser; +import langutils; +import autocomplete; +import highlighter; + +pure bool isLineOfCode(TokenType t) +{ + switch(t) + { + case TokenType.semicolon: + case TokenType.tWhile: + case TokenType.tIf: + case TokenType.tFor: + case TokenType.tForeach: + case TokenType.tCase: + return true; + default: + return false; + } +} + +/** + * Loads any import directories specified in /etc/dmd.conf. + * Bugs: Only works on Linux + * Returns: the paths specified as -I options in /etc/dmd.conf + */ +string[] loadDefaultImports() +{ +version(linux) +{ + string path = "/etc/dmd.conf"; + if (!exists(path)) + return []; + string[] rVal; + auto file = File(path, "r"); + foreach(char[] line; file.byLine()) + { + if (!line.startsWith("DFLAGS")) + continue; + while ((line = line.find("-I")).length > 0) + { + auto end = std.string.indexOf(line, " "); + auto importDir = line[2 .. end].idup; + rVal ~= importDir; + line = line[end .. $]; + } + } + return rVal; +} +else +{ + return []; +} +} + +/** + * Returns: the absolute path of the given module, or null if it could not be + * found. + */ +string findAbsPath(string[] dirs, string moduleName) +{ + // For file names + if (endsWith(moduleName, ".d") || endsWith(moduleName, ".di")) + { + if (startsWith(moduleName, "/")) + return moduleName; + else + return getcwd() ~ "/" ~ moduleName; + } + + // Try to find the file name from a module name like "std.stdio" + foreach(dir; dirs) + { + string fileLocation = dir ~ "/" ~ replace(moduleName, ".", "/"); + string dfile = fileLocation ~ ".d"; + if (exists(dfile) && isFile(dfile)) + { + return dfile; + } + if (exists(fileLocation ~ ".di") && isFile(fileLocation ~ ".di")) + { + return fileLocation ~ ".di"; + } + } + stderr.writeln("Could not locate import ", moduleName, " in ", dirs); + return null; +} + +string[] loadConfig() +{ + string path = expandTilde("~/.dscanner"); + string[] dirs; + if (exists(path)) + { + auto f = File(path, "r"); + scope(exit) f.close(); + + auto trimRegex = ctRegex!("\\s*$"); + foreach(string line; lines(f)) + { + dirs ~= replace(line, trimRegex, ""); + } + } + foreach(string importDir; loadDefaultImports()) { + dirs ~= importDir; + } + return dirs; +} + + +void main(string[] args) +{ + string[] importDirs; + bool sloc; + bool dotComplete; + bool json; + bool parenComplete; + bool highlight; + getopt(args, "I", &importDirs, "dotComplete", &dotComplete, "sloc", &sloc, + "json", &json, "parenComplete", &parenComplete, "highlight", &highlight); + + importDirs ~= loadConfig(); + + if (sloc) + { + writeln(args[1..$].map!(a => a.readText().tokenize())().joiner() + .count!(a => isLineOfCode(a.type))()); + return; + } + + if (highlight) + { + highlighter.highlight(args[1].readText().tokenize(IterationStyle.EVERYTHING)); + return; + } + + if (dotComplete || parenComplete) + { + auto tokens = args[1].readText().tokenize(); + auto mod = parseModule(tokens); + auto context = new CompletionContext(mod); + foreach (im; parallel(mod.imports)) + { + auto p = findAbsPath(importDirs, im); + if (p is null || !p.exists()) + continue; + context.addModule(p.readText().tokenize().parseModule()); + } + auto complete = AutoComplete(tokens, context); + if (parenComplete) + writeln(complete.parenComplete(to!size_t(args[2]))); + else if (dotComplete) + writeln(complete.dotComplete(to!size_t(args[2]))); + return; + } + + if (json) + { + auto tokens = tokenize(readText(args[1])); + auto mod = parseModule(tokens); + mod.writeJSONTo(stdout); + } +} + diff --git a/parser.d b/parser.d new file mode 100644 index 0000000..e9109f0 --- /dev/null +++ b/parser.d @@ -0,0 +1,830 @@ +/******************************************************************************* + * The MIT License + * + * Copyright (c) 2012 Brian Schott (Sir Alaran) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + + +module parser; + +import std.stream; +import std.array; +import std.stdio; +import std.algorithm; + +import types, tokenizer; +import langutils; + + +/** + * Params: + * tokens = the array of tokens + * index = an index into tokens such that tokens[index].type == open + * open = the opening delimiter + * close = the closing delimiter + * Returns: all tokens that are between the balanced delimiters that start at + * tokens[index], not including the delimiters. If the delimiters in tokens + * are not balanced, this function will return tokens[index + 1 .. $]; + */ +const(Token)[] betweenBalanced(const Token[] tokens, ref size_t index, TokenType open, + TokenType close) +in +{ + assert (tokens[index] == open); +} +body +{ + ++index; + size_t start = index; + int depth = 1; + while (depth > 0 && index < tokens.length) + { + if (tokens[index] == open) ++depth; + else if (tokens[index] == close) --depth; + ++index; + } + return tokens[start .. index - 1]; +} + + +/** + * See_also: betweenBalanced + */ +const(Token)[] betweenBalancedBraces(const Token[] tokens, ref size_t index) +{ + return betweenBalanced(tokens, index, TokenType.lBrace, TokenType.rBrace); +} + + +/** + * See_also: betweenBalanced + */ +const(Token)[] betweenBalancedParens(const Token[] tokens, ref size_t index) +{ + return betweenBalanced(tokens, index, TokenType.lParen, TokenType.rParen); +} + + +/** + * See_also: betweenBalanced + */ +const(Token)[] betweenBalancedBrackets(const Token[] tokens, ref size_t index) +{ + return betweenBalanced(tokens, index, TokenType.lBracket, TokenType.rBracket); +} + +void skipBalanced(alias Op, alias Cl)(const Token[] tokens, ref size_t index) +{ + int depth = tokens[index] == Op ? 1 : -1; + int deltaIndex = depth; + index += deltaIndex; + for (; index < tokens.length && index > 0 && depth != 0; index += deltaIndex) + { + switch (tokens[index].type) + { + case Op: ++depth; break; + case Cl: --depth; break; + default: break; + } + } +} + +void skipParens(const Token[] tokens, ref size_t index) +{ + skipBalanced!(TokenType.lParen, TokenType.rParen)(tokens, index); +} + +void skipBrackets(const Token[] tokens, ref size_t index) +{ + skipBalanced!(TokenType.lBracket, TokenType.rBracket)(tokens, index); +} + +/** + * Params: + * tokens = the token array to examine + * index = an indext into tokens such that tokens[index].type == open + * open = the opening delimiter + * close = the closing delimiter + * Returns: a string representing the contents of the two delimiters. This will + * not preserve whitespace, but it will place a single space character after + * a comma and between identifiers. + */ +string content(const Token[] tokens, ref size_t index, TokenType open, TokenType close) +in +{ + assert (tokens[index] == open); +} +body +{ + index++; + auto app = appender!string(); + int depth = 1; + while (depth > 0 && index < tokens.length) + { + if (tokens[index] == open) ++depth; + else if (tokens[index] == close) --depth; + else if (tokens[index] == TokenType.comma) + { + app.put(", "); + } + else + app.put(tokens[index].value); + ++index; + } + return app.data; +} + + +/** + * See_also: content + */ +string parenContent(const Token[]tokens, ref size_t index) +{ + return "(" ~ content(tokens, index, TokenType.lParen, TokenType.rParen) ~ ")"; +} + + +/** + * See_also: content + */ +string bracketContent(const Token[]tokens, ref size_t index) +{ + return "[" ~ content(tokens, index, TokenType.lBracket, TokenType.rBracket) ~ "]"; +} + + +/** + * Advances index until it indexes a character in tokens after a right brace if + * index initially indexed a right brace, or advances index until it indexes a + * character after a simicolon otherwise. + */ +void skipBlockStatement(const Token[] tokens, ref size_t index) +{ + if (tokens[index] == TokenType.lBrace) + betweenBalancedBraces(tokens, index); + else + { + skipPastNext(tokens, TokenType.semicolon, index); + } +} + + +/** + * Advances index until it indexes a character in tokens directly after a token + * of type type. This function handles nesting of braces, brackets, and + * parenthesis + */ +void skipPastNext(const Token[] tokens, TokenType type, ref size_t index) +{ + while (index < tokens.length) + { + if (tokens[index].type == TokenType.lBrace) + betweenBalancedBraces(tokens, index); + else if (tokens[index].type == TokenType.lParen) + betweenBalancedParens(tokens, index); + else if (tokens[index].type == TokenType.lBracket) + betweenBalancedBrackets(tokens, index); + else if (tokens[index].type == type) + { + ++index; + return; + } + else + ++index; + } +} + +string parseTypeDeclaration(const Token[] tokens, ref size_t index) +{ + auto type = tokens[index++].value.idup; + buildingType: while (index < tokens.length) + { + switch (tokens[index].type) + { + case TokenType.lBracket: + type ~= bracketContent(tokens, index); + break; + case TokenType.not: + type ~= tokens[index++].value; + if (tokens[index] == TokenType.lParen) + type ~= parenContent(tokens, index); + else + type ~= tokens[index++].value; + break; + case TokenType.star: + case TokenType.bitAnd: + type ~= tokens[index++].value; + break; + default: + break buildingType; + } + } + return type; +} + +/** + * Parses a module from a token array. + * Params: + * protection = the default protection level for a block statement + * attributes = the default attributes for a block statement + * Returns: the parsed module + */ +Module parseModule(const Token[] tokens, string protection = "public", string[] attributes = []) +{ + string type; + string name; + string localProtection = ""; + string[] localAttributes = []; + + void resetLocals() + { + type = ""; + name = ""; + localProtection = ""; + localAttributes = []; + } + + Module mod = new Module; + size_t index = 0; + while(index < tokens.length) + { + switch(tokens[index].type) + { + case TokenType.tElse: + case TokenType.tMixin: + case TokenType.tAssert: + ++index; + tokens.skipBlockStatement(index); + break; + case TokenType.tAlias: + tokens.skipBlockStatement(index); + break; + case TokenType.tImport: + mod.imports ~= parseImports(tokens, index); + resetLocals(); + break; + case TokenType.tVersion: + ++index; + if (tokens[index] == TokenType.lParen) + { + tokens.betweenBalancedParens(index); + if (tokens[index] == TokenType.lBrace) + mod.merge(parseModule(betweenBalancedBraces(tokens, index), + localProtection.empty() ? protection : localProtection, + attributes)); + } + else if (tokens[index] == TokenType.assign) + tokens.skipBlockStatement(index); + break; + case TokenType.atDisable: + case TokenType.atProperty: + case TokenType.atSafe: + case TokenType.atSystem: + case TokenType.tAbstract: + case TokenType.tConst: + case TokenType.tDeprecated: + case TokenType.tExtern: + case TokenType.tFinal: + case TokenType.t__gshared: + case TokenType.tImmutable: + case TokenType.tInout: + case TokenType.tNothrow: + case TokenType.tOverride: + case TokenType.tPure: + case TokenType.tScope: + case TokenType.tShared: + case TokenType.tStatic: + case TokenType.tSynchronized: + auto tmp = tokens[index++].value; + if (tokens[index] == TokenType.lParen) + type = tmp ~ parenContent(tokens, index); + else if (tokens[index] == TokenType.colon) + { + index++; + attributes ~= tmp; + } + else + localAttributes ~= tmp; + break; + case TokenType.tAlign: + string attribute = tokens[index++].value; + if (tokens[index] == TokenType.lParen) + attribute ~= parenContent(tokens, index); + if (tokens[index] == TokenType.lBrace) + mod.merge(parseModule(betweenBalancedBraces(tokens, index), + localProtection.empty() ? protection : localProtection, + attributes ~ attribute)); + else if (tokens[index] == TokenType.colon) + { + ++index; + attributes ~= attribute; + } + else + localAttributes ~= attribute; + break; + case TokenType.PROTECTION_BEGIN: .. case TokenType.PROTECTION_END: + string p = tokens[index++].value; + if (tokens[index] == TokenType.colon) + { + protection = p; + ++index; + } + else if (tokens[index] == TokenType.lBrace) + mod.merge(parseModule(betweenBalancedBraces(tokens, index), + p, attributes ~ localAttributes)); + else + localProtection = p; + break; + case TokenType.tModule: + ++index; + while (index < tokens.length && tokens[index] != TokenType.semicolon) + mod.name ~= tokens[index++].value; + ++index; + resetLocals(); + break; + case TokenType.tUnion: + mod.unions ~= parseUnion(tokens, index, + localProtection.empty() ? protection : localProtection, + localAttributes ~ attributes); + resetLocals(); + break; + case TokenType.tClass: + mod.classes ~= parseClass(tokens, index, + localProtection.empty() ? protection : localProtection, + localAttributes ~ attributes); + resetLocals(); + break; + case TokenType.tInterface: + mod.interfaces ~= parseInterface(tokens, index, + localProtection.empty() ? protection : localProtection, + localAttributes ~ attributes); + resetLocals(); + break; + case TokenType.tStruct: + mod.structs ~= parseStruct(tokens, index, + localProtection.empty() ? protection : localProtection, + localAttributes ~ attributes); + resetLocals(); + break; + case TokenType.tEnum: + mod.enums ~= parseEnum(tokens, index, + localProtection.empty() ? protection : localProtection, + localAttributes ~ attributes); + resetLocals(); + break; + case TokenType.tTemplate: + ++index; // template + ++index; // name + if (tokens[index] == TokenType.lParen) + tokens.betweenBalancedParens(index); // params + if (tokens[index] == TokenType.lBrace) + tokens.betweenBalancedBraces(index); // body + resetLocals(); + break; + case TokenType.TYPES_BEGIN: .. case TokenType.TYPES_END: + case TokenType.tAuto: + case TokenType.identifier: + if (type.empty()) + { + type = tokens.parseTypeDeclaration(index); + } + else + { + name = tokens[index++].value; + if (index >= tokens.length) break; + if (tokens[index] == TokenType.lParen) + { + mod.functions ~= parseFunction(tokens, index, type, name, + tokens[index].lineNumber, + localProtection.empty() ? protection : localProtection, + attributes ~ localAttributes); + } + else + { + Variable v = new Variable; + v.name = name; + v.type = type; + v.attributes = localAttributes ~ attributes; + v.protection = localProtection.empty() ? protection : localProtection; + v.line = tokens[index].lineNumber; + mod.variables ~= v; + } + resetLocals(); + } + break; + case TokenType.tUnittest: + ++index; + if (!tokens.empty() && tokens[index] == TokenType.lBrace) + tokens.skipBlockStatement(index); + resetLocals(); + break; + case TokenType.tilde: + ++index; + if (tokens[index] == TokenType.tThis) + { + name = "~"; + goto case; + } + break; + case TokenType.tThis: + name ~= tokens[index++].value; + if (tokens[index] == TokenType.lParen) + { + mod.functions ~= parseFunction(tokens, index, "", name, + tokens[index - 1].lineNumber, + localProtection.empty() ? protection : localProtection, + localAttributes ~ attributes); + } + resetLocals(); + break; + default: + ++index; + break; + } + } + return mod; +} + + +/** + * Parses an import statement + * Returns: only the module names that were imported, not which symbols were + * selectively improted. + */ +string[] parseImports(const Token[] tokens, ref size_t index) +{ + assert(tokens[index] == TokenType.tImport); + ++index; + auto app = appender!(string[])(); + string im; + while (index < tokens.length) + { + switch(tokens[index].type) + { + case TokenType.comma: + ++index; + app.put(im); + im = ""; + break; + case TokenType.assign: + case TokenType.semicolon: + app.put(im); + ++index; + return app.data; + case TokenType.colon: + app.put(im); + tokens.skipBlockStatement(index); + return app.data; + default: + im ~= tokens[index++].value; + break; + } + } + return app.data; +} + + +/** + * Parses an enum declaration + */ +Enum parseEnum(const Token[] tokens, ref size_t index, string protection, + string[] attributes) +in +{ + assert (tokens[index] == TokenType.tEnum); +} +body +{ + ++index; + Enum e = new Enum; + e.line = tokens[index].lineNumber; + e.name = tokens[index++].value; + + if (tokens[index] == TokenType.colon) + { + ++index; + e.type = tokens[index++].value; + } + else + e.type = "uint"; + + if (tokens[index] != TokenType.lBrace) + { + tokens.skipBlockStatement(index); + return e; + } + + auto r = betweenBalancedBraces(tokens, index); + for (size_t i = 0; i < r.length;) + { + if (r[i].type == TokenType.identifier) + { + EnumMember member; + member.line = r[i].lineNumber; + member.name = r[i].value; + e.members ~= member; + r.skipPastNext(TokenType.comma, i); + } + else + ++i; + } + return e; +} + + +/** + * Parses a function declaration + */ +Function parseFunction(const Token[] tokens, ref size_t index, string type, + string name, uint line, string protection, string[] attributes) +in +{ + assert (tokens[index] == TokenType.lParen); +} +body +{ + Function f = new Function; + f.name = name; + f.returnType = type; + f.line = line; + f.attributes.insertInPlace(f.attributes.length, attributes); + + Variable[] vars1 = parseParameters(tokens, index); + if (tokens[index] == TokenType.lParen) + { + f.templateParameters.insertInPlace(f.templateParameters.length, + map!("a.type")(vars1)); + f.parameters.insertInPlace(f.parameters.length, + parseParameters(tokens, index)); + } + else + f.parameters.insertInPlace(f.parameters.length, vars1); + + attributeLoop: while(index < tokens.length) + { + switch (tokens[index].type) + { + case TokenType.tImmutable: + case TokenType.tConst: + case TokenType.tPure: + case TokenType.atTrusted: + case TokenType.atProperty: + case TokenType.tNothrow: + case TokenType.tFinal: + case TokenType.tOverride: + f.attributes ~= tokens[index++].value; + break; + default: + break attributeLoop; + } + } + + if (tokens[index] == TokenType.tIf) + f.constraint = parseConstraint(tokens, index); + while (index < tokens.length && + (tokens[index] == TokenType.tIn || tokens[index] == TokenType.tOut + || tokens[index] == TokenType.tBody)) + { + ++index; + if (index < tokens.length && tokens[index] == TokenType.lBrace) + tokens.skipBlockStatement(index); + } + if (index >= tokens.length) + return f; + if (tokens[index] == TokenType.lBrace) + tokens.skipBlockStatement(index); + else if (tokens[index] == TokenType.semicolon) + ++index; + return f; +} + +string parseConstraint(const Token[] tokens, ref size_t index) +{ + auto appender = appender!(string)(); + assert(tokens[index] == TokenType.tIf); + appender.put(tokens[index++].value); + assert(tokens[index] == TokenType.lParen); + return "if " ~ parenContent(tokens, index); +} + +Variable[] parseParameters(const Token[] tokens, ref size_t index) +in +{ + assert (tokens[index] == TokenType.lParen); +} +body +{ + auto appender = appender!(Variable[])(); + Variable v = new Variable; + auto r = betweenBalancedParens(tokens, index); + size_t i = 0; + while (i < r.length) + { + switch(r[i].type) + { + case TokenType.tIn: + case TokenType.tOut: + case TokenType.tRef: + case TokenType.tScope: + case TokenType.tLazy: + case TokenType.tConst: + case TokenType.tImmutable: + case TokenType.tShared: + case TokenType.tInout: + auto tmp = r[i++].value; + if (r[i] == TokenType.lParen) + v.type ~= tmp ~ parenContent(r, i); + else + v.attributes ~= tmp; + break; + case TokenType.colon: + i++; + r.skipPastNext(TokenType.comma, i); + appender.put(v); + v = new Variable; + break; + case TokenType.comma: + ++i; + appender.put(v); + v = new Variable; + break; + default: + if (v.type.empty()) + { + v.type = r.parseTypeDeclaration(i); + if (i >= r.length) + appender.put(v); + } + else + { + v.line = r[i].lineNumber; + v.name = r[i++].value; + appender.put(v); + if (i < r.length && r[i] == TokenType.vararg) + { + v.type ~= " ..."; + } + v = new Variable; + r.skipPastNext(TokenType.comma, i); + } + break; + } + } + return appender.data; +} + +string[] parseBaseClassList(const Token[] tokens, ref size_t index) +in +{ + assert(tokens[index] == TokenType.colon); +} +body +{ + auto appender = appender!(string[])(); + ++index; + while (index < tokens.length) + { + if (tokens[index] == TokenType.identifier) + { + string base = parseTypeDeclaration(tokens, index); + appender.put(base); + if (tokens[index] == TokenType.comma) + ++index; + else + break; + } + else + break; + } + return appender.data; +} + +void parseStructBody(const Token[] tokens, ref size_t index, Struct st) +{ + st.bodyStart = tokens[index].startIndex; + Module m = parseModule(betweenBalancedBraces(tokens, index)); + st.bodyEnd = tokens[index - 1].startIndex; + st.functions.insertInPlace(0, m.functions); + st.variables.insertInPlace(0, m.variables); +} + + +Struct parseStructOrUnion(const Token[] tokens, ref size_t index, string protection, + string[] attributes) +{ + Struct s = new Struct; + s.line = tokens[index].lineNumber; + s.attributes = attributes; + s.protection = protection; + s.name = tokens[index++].value; + if (tokens[index] == TokenType.lParen) + s.templateParameters.insertInPlace(s.templateParameters.length, + map!("a.type")(parseParameters(tokens, index))); + + if (index >= tokens.length) return s; + + if (tokens[index] == TokenType.tIf) + s.constraint = parseConstraint(tokens, index); + + if (index >= tokens.length) return s; + + if (tokens[index] == TokenType.lBrace) + parseStructBody(tokens, index, s); + else + tokens.skipBlockStatement(index); + return s; +} + +Struct parseStruct(const Token[] tokens, ref size_t index, string protection, + string[] attributes) +in +{ + assert(tokens[index] == TokenType.tStruct); +} +body +{ + return parseStructOrUnion(tokens, ++index, protection, attributes); +} + +Struct parseUnion(const Token[] tokens, ref size_t index, string protection, + string[] attributes) +in +{ + assert(tokens[index] == TokenType.tUnion); +} +body +{ + return parseStructOrUnion(tokens, ++index, protection, attributes); +} + +Inherits parseInherits(const Token[] tokens, ref size_t index, string protection, + string[] attributes) +{ + auto i = new Inherits; + i.line = tokens[index].lineNumber; + i.name = tokens[index++].value; + i.protection = protection; + i.attributes.insertInPlace(i.attributes.length, attributes); + if (tokens[index] == TokenType.lParen) + i.templateParameters.insertInPlace(i.templateParameters.length, + map!("a.type")(parseParameters(tokens, index))); + + if (index >= tokens.length) return i; + + if (tokens[index] == TokenType.tIf) + i.constraint = parseConstraint(tokens, index); + + if (index >= tokens.length) return i; + + if (tokens[index] == TokenType.colon) + i.baseClasses = parseBaseClassList(tokens, index); + + if (index >= tokens.length) return i; + + if (tokens[index] == TokenType.lBrace) + parseStructBody(tokens, index, i); + else + tokens.skipBlockStatement(index); + return i; +} + +Inherits parseInterface(const Token[] tokens, ref size_t index, string protection, + string[] attributes) +in +{ + assert (tokens[index] == TokenType.tInterface); +} +body +{ + return parseInherits(tokens, ++index, protection, attributes); +} + + +Inherits parseClass(const Token[] tokens, ref size_t index, string protection, + string[] attributes) +in +{ + assert(tokens[index] == TokenType.tClass); +} +body +{ + return parseInherits(tokens, ++index, protection, attributes); +} diff --git a/tokenizer.d b/tokenizer.d new file mode 100644 index 0000000..4049b61 --- /dev/null +++ b/tokenizer.d @@ -0,0 +1,552 @@ +/******************************************************************************* + * The MIT License + * + * Copyright (c) 2012 Brian Schott (Sir Alaran / Hackerpilot) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + + +import std.range; +import std.file; +import std.traits; +import std.algorithm; +import std.conv; +import std.uni; +import std.stdio; + +import langutils; +import codegen; + + +/** + * Increments endIndex until it indexes a non-whitespace character in + * inputString. + * Params: + * inputString = the source code to examine + * endIndex = an index into inputString + * lineNumber = the line number that corresponds to endIndex + * style = the code iteration style + * Returns: The whitespace, or null if style was CODE_ONLY + */ +pure nothrow string lexWhitespace(S)(S inputString, ref size_t endIndex, + ref uint lineNumber, IterationStyle style = IterationStyle.CODE_ONLY) + if (isSomeString!S) +{ + immutable startIndex = endIndex; + while (endIndex < inputString.length && isWhite(inputString[endIndex])) + { + if (inputString[endIndex] == '\n') + lineNumber++; + ++endIndex; + } + final switch (style) + { + case IterationStyle.EVERYTHING: + return inputString[startIndex .. endIndex]; + case IterationStyle.CODE_ONLY: + return null; + } +} + + +/** + * Increments endIndex until it indexes a character directly after a comment + * Params: + * inputString = the source code to examine + * endIndex = an index into inputString at the second character of a + * comment, i.e. points at the second slash in a // comment. + * lineNumber = the line number that corresponds to endIndex + * Returns: The comment + */ +pure nothrow string lexComment(S)(ref S inputString, ref size_t endIndex, + ref uint lineNumber) if (isSomeString!S) +{ + if (inputString.length == 0) + return ""; + auto startIndex = endIndex - 1; + switch(inputString[endIndex]) + { + case '/': + while (endIndex < inputString.length && inputString[endIndex] != '\n') + { + if (inputString[endIndex] == '\n') + ++lineNumber; + ++endIndex; + } + break; + case '*': + while (endIndex < inputString.length + && !inputString[endIndex..$].startsWith("*/")) + { + if (inputString[endIndex] == '\n') + ++lineNumber; + ++endIndex; + } + endIndex += 2; + break; + case '+': + ++endIndex; + int depth = 1; + while (depth > 0 && endIndex + 1 < inputString.length) + { + if (inputString[endIndex] == '\n') + lineNumber++; + else if (inputString[endIndex..$].startsWith("+/")) + depth--; + else if (inputString[endIndex..$].startsWith("/+")) + depth++; + ++endIndex; + } + ++endIndex; + break; + default: + break; + } + return inputString[startIndex..endIndex]; +} + + +/** + * Params: + * inputString = the source code to examine + * endIndex = an index into inputString at the opening quote + * lineNumber = the line number that corresponds to endIndex + * quote = the opening (and closing) quote character for the string to be + * lexed + * Returns: a string literal, including its opening and closing quote characters + * Bugs: Does not handle string suffixes + */ +pure nothrow string lexString(S, C)(S inputString, ref size_t endIndex, ref uint lineNumber, + C quote, bool canEscape = true) if (isSomeString!S && isSomeChar!C) +in +{ + assert (inputString[endIndex] == quote); + assert (quote == '\'' || quote == '\"' || quote == '`'); +} +body +{ + if (inputString[endIndex] != quote) + return ""; + auto startIndex = endIndex; + ++endIndex; + bool escape = false; + while (endIndex < inputString.length && (inputString[endIndex] != quote || escape)) + { + if (escape) + escape = false; + else + escape = (canEscape && inputString[endIndex] == '\\'); + if (inputString[endIndex] == '\n') + lineNumber++; + ++endIndex; + } + ++endIndex; + endIndex = min(endIndex, inputString.length); + return inputString[startIndex .. endIndex]; +} + + +/** + * Lexes the various crazy D string literals such as q{}, q"WTF is this? WTF", + * and q"<>". + * Params: + * inputString = the source code to examine + * endIndex = an index into inputString at the opening quote + * lineNumber = the line number that corresponds to endIndex + * Returns: a string literal, including its opening and closing quote characters + */ +string lexDelimitedString(S)(ref S inputString, ref size_t endIndex, + ref uint lineNumber) if (isSomeString!S) +{ + auto startIndex = endIndex; + ++endIndex; + string open = to!string(inputString[endIndex]); + string close; + bool nesting = false; + switch (open) + { + case "[": close = "]"; ++endIndex; nesting = true; break; + case "<": close = ">"; ++endIndex; nesting = true; break; + case "{": close = "}"; ++endIndex; nesting = true; break; + case "(": close = ")"; ++endIndex; nesting = true; break; + default: + while(!isWhite(inputString[endIndex])) endIndex++; + close = open = inputString[startIndex + 1 .. endIndex]; + break; + } + int depth = 1; + while (endIndex < inputString.length && depth > 0) + { + if (inputString[endIndex] == '\n') + { + lineNumber++; + endIndex++; + } + else if (inputString[endIndex..$].startsWith(open)) + { + endIndex += open.length; + if (!nesting) + { + if (inputString[endIndex] == '\"') + ++endIndex; + break; + } + depth++; + } + else if (inputString[endIndex..$].startsWith(close)) + { + endIndex += close.length; + depth--; + if (depth <= 0) + break; + } + else + ++endIndex; + } + if (endIndex < inputString.length && inputString[endIndex] == '\"') + ++endIndex; + return inputString[startIndex .. endIndex]; +} + + +string lexTokenString(S)(ref S inputString, ref size_t endIndex, ref uint lineNumber) +{ + /+auto r = byDToken(range, IterationStyle.EVERYTHING); + string s = getBraceContent(r); + range.popFrontN(s.length); + return s;+/ + return ""; +} + +/** + * + */ +pure nothrow string lexNumber(S)(ref S inputString, ref size_t endIndex) if (isSomeString!S) +{ + auto startIndex = endIndex; + bool foundDot = false; + bool foundX = false; + bool foundB = false; + bool foundE = false; + numberLoop: while (endIndex < inputString.length) + { + switch (inputString[endIndex]) + { + case '0': + if (!foundX) + { + ++endIndex; + if (endIndex < inputString.length + && (inputString[endIndex] == 'x' || inputString[endIndex] == 'X')) + { + ++endIndex; + foundX = true; + } + } + else + ++endIndex; + break; + case 'b': + if (foundB) + break numberLoop; + foundB = true; + ++endIndex; + break; + case '.': + if (foundDot || foundX || foundE) + break numberLoop; + foundDot = true; + ++endIndex; + break; + case '+': + case '-': + if (!foundE) + break numberLoop; + ++endIndex; + break; + case 'p': + case 'P': + if (!foundX) + break numberLoop; + foundE = true; + goto case '_'; + case 'e': + case 'E': + if (foundE || foundX) + break numberLoop; + foundE = true; + goto case '_'; + case '1': .. case '9': + case '_': + ++endIndex; + break; + case 'F': + case 'f': + case 'L': + case 'i': + ++endIndex; + break numberLoop; + default: + break numberLoop; + } + } + return inputString[startIndex .. endIndex]; +} + + +/** + * Returns: true if ch marks the ending of one token and the beginning of + * another, false otherwise + */ +pure nothrow bool isSeparating(C)(C ch) if (isSomeChar!C) +{ + switch (ch) + { + case '!': .. case '/': + case ':': .. case '@': + case '[': .. case '^': + case '{': .. case '~': + case 0x20: // space + case 0x09: // tab + case 0x0a: .. case 0x0d: // newline, vertical tab, form feed, carriage return + return true; + default: + return false; + } +} + +/** + * Configure the tokenize() function + */ +enum IterationStyle +{ + /// Only include code, not whitespace or comments + CODE_ONLY, + /// Include everything + EVERYTHING +} + +Token[] tokenize(S)(S inputString, IterationStyle iterationStyle = IterationStyle.CODE_ONLY) + if (isSomeString!S) +{ + auto tokenAppender = appender!(Token[])(); + + // This is very likely a local maximum, but it does seem to take a few + // milliseconds off of the run time + tokenAppender.reserve(inputString.length / 4); + + size_t endIndex = 0; + uint lineNumber = 1; + while (endIndex < inputString.length) + { + Token currentToken; + auto startIndex = endIndex; + if (isWhite(inputString[endIndex])) + { + if (iterationStyle == IterationStyle.EVERYTHING) + { + currentToken.lineNumber = lineNumber; + currentToken.value = lexWhitespace(inputString, endIndex, + lineNumber, IterationStyle.EVERYTHING); + currentToken.type = TokenType.whitespace; + tokenAppender.put(currentToken); + } + else + lexWhitespace(inputString, endIndex, lineNumber); + continue; + } + currentToken.startIndex = endIndex; + + outerSwitch: switch(inputString[endIndex]) + { + mixin(generateCaseTrie( + "=", "TokenType.assign", + "&", "TokenType.bitAnd", + "&=", "TokenType.bitAndEquals", + "|", "TokenType.bitOr", + "|=", "TokenType.bitOrEquals", + "~=", "TokenType.catEquals", + ":", "TokenType.colon", + ",", "TokenType.comma", + "$", "TokenType.dollar", + ".", "TokenType.dot", + "==", "TokenType.equals", + "=>", "TokenType.goesTo", + ">", "TokenType.greater", + ">=", "TokenType.greaterEqual", + "#", "TokenType.hash", + "&&", "TokenType.lAnd", + "{", "TokenType.lBrace", + "[", "TokenType.lBracket", + "<", "TokenType.less", + "<=", "TokenType.lessEqual", + "<>=", "TokenType.lessEqualGreater", + "<>", "TokenType.lessOrGreater", + "||", "TokenType.lOr", + "(", "TokenType.lParen", + "-", "TokenType.minus", + "-=", "TokenType.minusEquals", + "%", "TokenType.mod", + "%=", "TokenType.modEquals", + "*=", "TokenType.mulEquals", + "!", "TokenType.not", + "!=", "TokenType.notEquals", + "!>", "TokenType.notGreater", + "!>=", "TokenType.notGreaterEqual", + "!<", "TokenType.notLess", + "!<=", "TokenType.notLessEqual", + "!<>", "TokenType.notLessEqualGreater", + "+", "TokenType.plus", + "+=", "TokenType.plusEquals", + "^^", "TokenType.pow", + "^^=", "TokenType.powEquals", + "}", "TokenType.rBrace", + "]", "TokenType.rBracket", + ")", "TokenType.rParen", + ";", "TokenType.semicolon", + "<<", "TokenType.shiftLeft", + "<<=", "TokenType.shiftLeftEqual", + ">>", "TokenType.shiftRight", + ">>=", "TokenType.shiftRightEqual", + "..", "TokenType.slice", + "*", "TokenType.star", + "?", "TokenType.ternary", + "~", "TokenType.tilde", + "--", "TokenType.uMinus", + "!<>=", "TokenType.unordered", + ">>>", "TokenType.unsignedShiftRight", + ">>>=", "TokenType.unsignedShiftRightEqual", + "++", "TokenType.uPlus", + "...", "TokenType.vararg", + "^", "TokenType.xor", + "^=", "TokenType.xorEquals", + )); + + case '0': .. case '9': + currentToken.value = lexNumber(inputString, endIndex); + currentToken.type = TokenType.numberLiteral; + currentToken.lineNumber = lineNumber; + break; + case '/': + ++endIndex; + if (endIndex >= inputString.length) + { + currentToken.value = "/"; + currentToken.type = TokenType.div; + currentToken.lineNumber = lineNumber; + break; + } + currentToken.lineNumber = lineNumber; + switch (inputString[endIndex]) + { + case '/': + case '+': + case '*': + if (iterationStyle == IterationStyle.CODE_ONLY) + { + lexComment(inputString, endIndex, lineNumber); + continue; + } + else + { + currentToken.value = lexComment(inputString, endIndex, lineNumber); + currentToken.type = TokenType.comment; + break; + } + case '=': + currentToken.value = "/="; + currentToken.type = TokenType.divEquals; + ++endIndex; + break; + default: + currentToken.value = "/"; + currentToken.type = TokenType.div; + break; + } + break; + case 'r': + currentToken.value = "r"; + ++endIndex; + if (inputString[endIndex] == '\"') + { + currentToken.lineNumber = lineNumber; + currentToken.value = lexString(inputString, endIndex, + lineNumber, inputString[endIndex], false); + currentToken.type = TokenType.stringLiteral; + break; + } + else + goto default; + case '`': + currentToken.lineNumber = lineNumber; + currentToken.value = lexString(inputString, endIndex, lineNumber, + inputString[endIndex], false); + currentToken.type = TokenType.stringLiteral; + break; + case 'x': + currentToken.value = "x"; + ++endIndex; + if (inputString[endIndex] == '\"') + goto case '\"'; + else + goto default; + case '\'': + case '"': + currentToken.lineNumber = lineNumber; + currentToken.value = lexString(inputString, endIndex, lineNumber, + inputString[endIndex]); + currentToken.type = TokenType.stringLiteral; + break; + case 'q': + ++endIndex; + switch (inputString[endIndex]) + { + case '\"': + currentToken.lineNumber = lineNumber; + currentToken.value ~= "q" ~ lexDelimitedString(inputString, + endIndex, lineNumber); + currentToken.type = TokenType.stringLiteral; + break outerSwitch; + case '{': + currentToken.lineNumber = lineNumber; + currentToken.value ~= "q" ~ lexTokenString(inputString, + endIndex, lineNumber); + currentToken.type = TokenType.stringLiteral; + break outerSwitch; + default: + break; + } + goto default; + case '@': + ++endIndex; + goto default; + default: + while(endIndex < inputString.length && !isSeparating(inputString[endIndex])) + ++endIndex; + currentToken.value = inputString[startIndex .. endIndex]; + currentToken.type = lookupTokenType(currentToken.value); + currentToken.lineNumber = lineNumber; + break; + } +// writeln(currentToken); + tokenAppender.put(currentToken); + } + return tokenAppender.data; +} diff --git a/types.d b/types.d new file mode 100644 index 0000000..6a54b2d --- /dev/null +++ b/types.d @@ -0,0 +1,567 @@ +/******************************************************************************* + * The MIT License + * + * Copyright (c) 2012 Brian Schott (Sir Alaran) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modif y, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +module types; + +import std.stdio; +import std.array; +import std.range; +import std.algorithm; +import std.typecons; + +/** + * Returns: s with any quote characters backslash-escaped + */ +string escapeJSON(string s) +{ + return s.replace("\"", "\\\""); +} + +unittest { assert(escapeJSON("abc\"def") == "abc\\\"def"); } + +/** + * Writes a string in JSON fromat to the given file + * Params: + * f = the file to write to + * name = the name of the json attribute + * value = the value of the json attribute + * indent = the indent level + */ +void writeJSONString(File f, const string name, const string value, uint indent = 0) +{ + f.write(std.array.replicate(" ", indent), "\"", name, "\" : \"", escapeJSON(value), "\""); +} + +/** + * Writes a string array in JSON format to the given file + * f = the file to write to + * name = the name of the json attribute + * values = the strings that should be written + * indent = the indent level + */ +void writeJSONString(File f, const string name, const string[] values, uint indent = 0) +{ + f.writeln(std.array.replicate(" ", indent), "\"", name, "\" : ["); + foreach(i, v; values) + { + f.write(std.array.replicate(" ", indent + 1), "\"", escapeJSON(v), "\""); + if (i + 1 < values.length) + f.writeln(","); + else + f.writeln(); + } + f.write(std.array.replicate(" ", indent), "]"); +} + +/** + * Attributes common to everything interesting + */ +abstract class Base +{ +public: + + /// Sybol name + string name; + + /// Line number of declaration + uint line; + + /// Attributes such as "ref", "const", etc. + string[] attributes; + + /// Protection level such as "public", protected, etc. + string protection; + + /// See_also: writeJSONString + void writeJSONTo(File f, uint indent) const + { + f.writeln(std.array.replicate(" ", indent + 1), "{"); + printMembers(f, indent + 2); + f.write("\n", std.array.replicate(" ", indent + 1), "}"); + } + +protected: + + void printMembers(File f, uint indent = 0) const + { + writeJSONString(f, "name", name, indent); + f.writeln(","); + f.write(std.array.replicate(" ", indent), "\"line\" : ", line); + f.writeln(","); + writeJSONString(f, "protection", protection, indent); + f.writeln(","); + writeJSONString(f, "attributes", attributes, indent); + } +} + +/** + * Varible declaration + */ +class Variable : Base +{ +public: + + /// Variable type + string type; + +protected: + + override void printMembers(File f, uint indent = 0) const + { + super.printMembers(f, indent); + f.writeln(","); + writeJSONString(f, "type", type, indent); + } +} + +/** + * Base class for any type that can be a template + */ +abstract class Templateable : Base +{ +public: + + /// Template constraint, which may be null + string constraint; + + /// Template parameters, may be empty + string[] templateParameters; + +protected: + + override void printMembers(File f, uint indent = 0) const + { + super.printMembers(f, indent); + f.writeln(","); + writeJSONString(f, "constraint", constraint, indent); + f.writeln(","); + writeJSONString(f, "templateParameters", templateParameters, indent); + } +} + +/** + * Stuff common to struct, interface, and class. + */ +class Struct : Templateable +{ +public: + + /// List of methods + Function[] functions; + + /// List of member variables; may be empty + Variable[] variables; + + /// Source code character position of the beginning of the struct body + size_t bodyStart; + + /// Source code character position of the end of the struct body + size_t bodyEnd; + + string getMemberType(string name) const + { + foreach (f; functions) + if (f.name == name) + return f.returnType; + foreach (v; variables) + if (v.name == name) + return v.type; + return null; + } + +protected: + + override void printMembers(File f, uint indent = 0) const + { + super.printMembers(f, indent); + f.writeln(",\n", std.array.replicate(" ", indent), "\"functions\" : ["); + foreach(i, fun; functions) + { + fun.writeJSONTo(f, indent); + if (i + 1 < functions.length) + f.writeln(","); + else + f.writeln(); + } + f.writeln(std.array.replicate(" ", indent), "],\n", std.array.replicate(" ", indent), "\"variables\" : ["); + foreach(i, var; variables) + { + var.writeJSONTo(f, indent); + if (i + 1 < variables.length) + f.writeln(","); + else + f.writeln(); + } + f.write(std.array.replicate(" ", indent), "]"); + } +} + +/** + * Functions and delegates + */ +class Function : Templateable +{ +public: + + /// Function return type + string returnType; + + /// Parameter list; may be empty + Variable[] parameters; + +protected: + override void printMembers(File f, uint indent) const + { + super.printMembers(f, indent); + f.write(",\n"); + f.writeln(std.array.replicate(" ", indent), "\"parameters\" : ["); + foreach(i, params; parameters) + { + params.writeJSONTo(f, indent); + if (i + 1 < parameters.length) + f.writeln(","); + else + f.writeln(); + } + + f.write(std.array.replicate(" ", indent), "],\n"); + writeJSONString(f, "returnType", returnType, indent); + } +} + +/** + * class and interface + */ +class Inherits : Struct +{ +public: + + /** + * List of interfaces and classes that this inherits or implements; may + * be empty + */ + string[] baseClasses; + +protected: + + override void printMembers(File f, uint indent = 0) const + { + super.printMembers(f, indent); + f.writeln(","); + writeJSONString(f, "baseClasses", baseClasses, indent); + } +} + +/** + * enum member + */ +struct EnumMember +{ + uint line; + string name; +} + +/** + * enum + */ +class Enum : Base +{ +public: + + /// Base type for this enum + string type; + + /// Enum members; may be empty + EnumMember[] members; + +protected: + + override void printMembers(File f, uint indent = 0) const + { + super.printMembers(f, indent); + f.writeln(","); + writeJSONString(f, "type", type, indent); + f.writeln(",\n", std.array.replicate(" ", indent), "\"members\" : ["); + foreach(i, member; members) + { + f.writeln(std.array.replicate(" ", indent + 1), "{"); + writeJSONString(f, "name", member.name, indent + 2); + f.writeln(","); + f.writeln(std.array.replicate(" ", indent + 2), "\"line\" : ", member.line); + f.write(std.array.replicate(" ", indent + 1), "}"); + if (i + 1 < members.length) + f.writeln(","); + else + f.writeln(); + } + f.write(std.array.replicate(" ", indent), "]"); + } +} + +/** + * Module is a container class for the other classes + */ +class Module +{ +public: + + /// Module name. Will be blank if there is no module statement + string name; + + /// List of interfaces declared in this module + Inherits[] interfaces; + + /// List of classes declared in this module + Inherits[] classes; + + /// List of functions declared in this module + Function[] functions; + + /// List of unions declared in this module + Struct[] unions; + + /// List of variables declared in this module + Variable[] variables; + + /// List of structs declared in this module + Struct[] structs; + + /// List of enums declared in this module + Enum[] enums; + + /// List of other modules that are imported by this one + string[] imports; + + /// Combine this module with another one + void merge(Module other) + { + interfaces.insertInPlace(interfaces.length, other.interfaces); + classes.insertInPlace(classes.length, other.classes); + functions.insertInPlace(functions.length, other.functions); + unions.insertInPlace(unions.length, other.unions); + variables.insertInPlace(variables.length, other.variables); + structs.insertInPlace(structs.length, other.structs); + enums.insertInPlace(enums.length, other.enums); + imports.insertInPlace(imports.length, other.imports); + } + + /** + * Prints a JSON representation of this module to the given file + */ + void writeJSONTo(File f) const + { + uint indent = 0; + f.writeln("{"); + writeJSONString(f, "name", name, indent + 1); + f.writeln(","); + writeJSONString(f, "imports", imports, indent + 1); + f.writeln(",\n \"interfaces\" : ["); + foreach(i, inter; interfaces) + { + inter.writeJSONTo(f, indent + 1); + if (i + 1 < interfaces.length) + f.writeln(","); + else + f.writeln(); + } + f.writeln(" ],\n \"classes\" : ["); + foreach(i, cl; classes) + { + cl.writeJSONTo(f, indent + 1); + if (i + 1 < classes.length) + f.writeln(","); + else + f.writeln(); + } + f.writeln(" ],\n \"structs\" : ["); + foreach(i, str; structs) + { + str.writeJSONTo(f, indent + 1); + if (i + 1 < structs.length) + f.writeln(","); + else + f.writeln(); + } + f.writeln(" ],\n \"structs\" : ["); + foreach(i, un; unions) + { + un.writeJSONTo(f, indent + 1); + if (i + 1 < unions.length) + f.writeln(","); + else + f.writeln(); + } + f.writeln(" ],\n \"functions\" : ["); + foreach(i, fun; functions) + { + fun.writeJSONTo(f, indent + 1); + if (i + 1 < functions.length) + f.writeln(","); + else + f.writeln(); + } + f.writeln(" ],\n \"variables\" : ["); + foreach(i, var; variables) + { + var.writeJSONTo(f, indent + 1); + if (i + 1 < variables.length) + f.writeln(","); + else + f.writeln(); + } + f.writeln(" ],\n \"enums\" : ["); + foreach(i, en; enums) + { + en.writeJSONTo(f, indent + 1); + if (i + 1 < enums.length) + f.writeln(","); + else + f.writeln(); + } + f.writeln(" ]\n}"); + } +} + +immutable(string[][string]) typeProperties; +immutable(string[]) floatProperties; +immutable(string[]) integralProperties; +immutable(string[]) commonProperties; +immutable(string[]) arrayProperties; + +static this() +{ + floatProperties = ["alignof", "dig", "epsilon", "im", "infinity", "init", + "mangleof", "mant_dig", "max", "max_10_exp", ".max_­exp", "min_10_­exp", + "min_­exp", "min_nor­mal", "nan", "re", "sizeof" + ]; + + integralProperties = ["alignof", "init", "mangleof", "max", + "min", "sizeof", "stringof" + ]; + + commonProperties = [ + "alignof", + "init", + "mangleof", + "stringof" + ]; + + arrayProperties = [ + "alignof", + "init", + "length", + "mangleof", + "ptr", + "stringof", + ]; + + typeProperties = [ + "bool" : commonProperties, + "byte" : integralProperties, + "ubyte" : integralProperties, + "short" : integralProperties, + "ushort" : integralProperties, + "int" : integralProperties, + "uint" : integralProperties, + "long" : integralProperties, + "ulong" : integralProperties, + "cent" : integralProperties, + "ucent" : integralProperties, + "float" : floatProperties, + "dou­ble" : floatProperties, + "real" : floatProperties, + "ifloat" : floatProperties, + "idou­ble" : floatProperties, + "ireal" : floatProperties, + "cfloat" : floatProperties, + "cdou­ble" : floatProperties, + "creal" : floatProperties, + "char" : commonProperties, + "wchar" : commonProperties, + "dchar" : commonProperties, + "ptrdiff_t" : integralProperties, + "size_t" : integralProperties, + "string" : arrayProperties, + "wstring" : arrayProperties, + "dstring" : arrayProperties + ]; +} + +class CompletionContext +{ +public: + + this(Module mod) + { + this.currentModule = mod; + } + + Tuple!(string, string)[string] getMembersOfType(string name) + { + foreach (m; chain(modules, [currentModule])) + { + foreach (s; chain(m.structs, m.interfaces, m.classes, m.unions)) + { + if (s.name != name) + continue; + Tuple!(string, string)[string] typeMap; + foreach(var; s.variables) + typeMap[var.name] = Tuple!(string, string)(var.type, "?1"); + foreach(fun; s.functions) + typeMap[fun.name] = Tuple!(string, string)(fun.returnType, "?2"); + return typeMap; + } + foreach (Enum e; m.enums) + { + if (e.name != name) + continue; + Tuple!(string, string)[string] typeMap; + foreach (member; e.members) + typeMap[member.name] = Tuple!(string, string)(e.type, "?1"); + return typeMap; + } + } + return null; + } + + Struct[] getStructsContaining(size_t cursorPosition) + { + auto app = appender!(Struct[])(); + foreach(s; chain(currentModule.structs, currentModule.interfaces, + currentModule.classes, currentModule.unions)) + { + if (s.bodyStart <= cursorPosition && s.bodyEnd >= cursorPosition) + app.put(s); + } + return app.data(); + } + + void addModule(Module mod) + { + modules ~= mod; + } + + Module currentModule; + Module[] modules; +}