From bbd2ec13eab916d2d82cae663f150b015733acdc Mon Sep 17 00:00:00 2001 From: Hackerpilot Date: Fri, 27 Apr 2012 02:26:34 -0700 Subject: [PATCH] Added foreach_reverse to list of tokens used for line-of-code count Alphabetized constants Cleaned up constant names Fixed defects with decimal parsing Eliminated iteration style parameter to lexWhitespace, as it didn't really speed things up. Added support for imaginary literals --- autocomplete.d | 66 ++++-- build.sh | 4 +- highlighter.d | 4 +- langutils.d | 623 ++++++++++++++++++++++++------------------------- main.d | 13 +- parser.d | 282 +++++++++++----------- tokenizer.d | 282 ++++++++++++---------- 7 files changed, 676 insertions(+), 598 deletions(-) diff --git a/autocomplete.d b/autocomplete.d index 82b58af..6a4ddaa 100644 --- a/autocomplete.d +++ b/autocomplete.d @@ -32,7 +32,31 @@ immutable string[] versions = ["AIX", "all", "Alpha", "ARM", "BigEndian", "BSD", */ size_t findEndOfExpression(const Token[] tokens, size_t index) { - return index; + size_t i = index; + while (i < tokens.length) + { + switch (tokens[i].type) + { + case TokenType.RBrace: + case TokenType.RParen: + case TokenType.RBracket: + case TokenType.Semicolon: + break; + case TokenType.LParen: + skipParens(tokens, index); + break; + case TokenType.LBrace: + skipBraces(tokens, index); + break; + case TokenType.LBracket: + skipBrackets(tokens, index); + break; + default: + ++i; + break; + } + } + return i; } size_t findBeginningOfExpression(const Token[] tokens, size_t index) @@ -64,19 +88,19 @@ struct AutoComplete switch (symbol.type) { - case TokenType.floatLiteral: + case TokenType.FloatLiteral: return "float"; - case TokenType.doubleLiteral: + case TokenType.DoubleLiteral: return "double"; - case TokenType.realLiteral: + case TokenType.RealLiteral: return "real"; - case TokenType.intLiteral: + case TokenType.IntLiteral: return "int"; - case TokenType.unsignedIntLiteral: + case TokenType.UnsignedIntLiteral: return "uint"; - case TokenType.longLiteral: + case TokenType.LongLiteral: return "long"; - case TokenType.unsignedLongLiteral: + case TokenType.UnsignedLongLiteral: return "ulong"; default: break; @@ -92,21 +116,21 @@ struct AutoComplete auto index = preceedingTokens.length - 1; while (true) { - if (preceedingTokens[index] == TokenType.lBrace) + if (preceedingTokens[index] == TokenType.LBrace) --depth; - else if (preceedingTokens[index] == TokenType.rBrace) + else if (preceedingTokens[index] == TokenType.RBrace) ++depth; else if (depth <= 0 && preceedingTokens[index].value == symbol) { // Found the symbol, now determine if it was declared here. auto p = preceedingTokens[index - 1]; - if ((p == TokenType.tAuto || p == TokenType.tImmutable - || p == TokenType.tConst) - && preceedingTokens[index + 1] == TokenType.assign) + if ((p == TokenType.Auto || p == TokenType.Immutable + || p == TokenType.Const) + && preceedingTokens[index + 1] == TokenType.Assign) { return null; } - else if (p == TokenType.identifier + else if (p == TokenType.Identifier || (p.type > TokenType.TYPES_BEGIN && p.type < TokenType.TYPES_END)) { @@ -153,14 +177,14 @@ struct AutoComplete return ""; switch (tokens[index].type) { - case TokenType.tVersion: + case TokenType.Version: return to!string(join(map!`a ~ "?1"`(versions), " ").array()); - case TokenType.tIf: - case TokenType.tCast: - case TokenType.tWhile: - case TokenType.tFor: - case TokenType.tForeach: - case TokenType.tSwitch: + case TokenType.If: + case TokenType.Cast: + case TokenType.While: + case TokenType.For: + case TokenType.Foreach: + case TokenType.Switch: return ""; default: return ""; diff --git a/build.sh b/build.sh index ac9688e..96031ad 100755 --- a/build.sh +++ b/build.sh @@ -1,2 +1,2 @@ -dmd *.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner -#dmd *.d -g -unittest -m64 -w -wi -property -ofdscanner +#dmd *.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner +dmd *.d -g -unittest -m64 -w -wi -property -ofdscanner diff --git a/highlighter.d b/highlighter.d index 9a9e557..ee43747 100644 --- a/highlighter.d +++ b/highlighter.d @@ -44,10 +44,10 @@ html { background-color: #111; color: #ccc; } case TokenType.TYPES_BEGIN: .. case TokenType.TYPES_END: writeSpan("type", t.value); break; - case TokenType.comment: + case TokenType.Comment: writeSpan("comment", t.value); break; - case TokenType.stringLiteral: + case TokenType.STRINGS_BEGIN: .. case TokenType.STRINGS_END: writeSpan("string", t.value); break; case TokenType.NUMBERS_BEGIN: .. case TokenType.NUMBERS_END: diff --git a/langutils.d b/langutils.d index 9c642ba..6ded5e4 100644 --- a/langutils.d +++ b/langutils.d @@ -1,4 +1,3 @@ - // Copyright Brian Schott (Sir Alaran) 2012. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt or copy at @@ -41,7 +40,7 @@ pure nothrow TokenType lookupTokenType(const string input) if (type !is null) return *type; else - return TokenType.identifier; + return TokenType.Identifier; } @@ -52,228 +51,226 @@ enum TokenType: uint { // Operators OPERATORS_BEGIN, - div, /// / - divEquals, /// /= - dot, /// . - slice, // .. - vararg, /// ... - bitAnd, /// & - bitAndEquals, /// &= - logicAnd, /// && - bitOr, /// | - bitOrEquals, /// |= - logicOr, /// || - minus, /// - - minusEquals, /// -= - uMinus, /// -- - plus, /// + - plusEquals, /// += - uPlus, /// ++ - less, /// < - lessEqual, /// <= - shiftLeft, /// << - shiftLeftEqual, /// <<= - lessOrGreater, /// <> - lessEqualGreater, // <>= - greater, /// > - greaterEqual, /// >= - shiftRightEqual, /// >>= - unsignedShiftRightEqual, /// >>>= - shiftRight, /// >> - unsignedShiftRight, /// >>> - not, /// ! - notEquals, /// != - notLessEqualGreater, /// !<> - unordered, /// !<>= - notLess, /// !< - notLessEqual, /// !<= - notGreater, /// !> - notGreaterEqual, /// !>= - lParen, /// $(LPAREN) - rParen, /// $(RPAREN) - lBracket, /// [ - rBracket, /// ] - lBrace, /// { - rBrace, /// } - ternary, /// ? - comma, /// , - semicolon, /// ; - colon, /// : - dollar, /// $ - assign, /// = - equals, /// == - star, /// * - mulEquals, /// *= - mod, /// % - modEquals, /// %= - xor, /// ^ - xorEquals, /// ^= - pow, /// ^^ - powEquals, /// ^^= - tilde, /// ~ - catEquals, /// ~= - hash, // # - goesTo, // => + Assign, /// = + BitAnd, /// & + BitAndEquals, /// &= + BitOr, /// | + BitOrEquals, /// |= + CatEquals, /// ~= + Colon, /// : + Comma, /// , + Decrement, /// -- + Div, /// / + DivEquals, /// /= + Dollar, /// $ + Dot, /// . + Equals, /// == + GoesTo, // => + Greater, /// > + GreaterEqual, /// >= + Hash, // # + Increment, /// ++ + LBrace, /// { + LBracket, /// [ + Less, /// < + LessEqual, /// <= + LessEqualGreater, // <>= + LessOrGreater, /// <> + LogicAnd, /// && + LogicOr, /// || + LParen, /// $(LPAREN) + Minus, /// - + MinusEquals, /// -= + Mod, /// % + ModEquals, /// %= + MulEquals, /// *= + Not, /// ! + NotEquals, /// != + NotGreater, /// !> + NotGreaterEqual, /// !>= + NotLess, /// !< + NotLessEqual, /// !<= + NotLessEqualGreater, /// !<> + Plus, /// + + PlusEquals, /// += + Pow, /// ^^ + PowEquals, /// ^^= + RBrace, /// } + RBracket, /// ] + RParen, /// $(RPAREN) + Semicolon, /// ; + ShiftLeft, /// << + ShiftLeftEqual, /// <<= + ShiftRight, /// >> + ShiftRightEqual, /// >>= + Slice, // .. + Star, /// * + Ternary, /// ? + Tilde, /// ~ + Unordered, /// !<>= + UnsignedShiftRight, /// >>> + UnsignedShiftRightEqual, /// >>>= + Vararg, /// ... + Xor, /// ^ + XorEquals, /// ^= OPERATORS_END, -// Types + // Types TYPES_BEGIN, - tString, /// string - tWString, /// wstring - tDString, /// dstring - tBool, /// bool, - tByte, /// byte, - tCdouble, /// cdouble, - tCent, /// cent, - tCfloat, /// cfloat, - tChar, /// char, - tCreal, /// creal, - tDchar, /// dchar, - tDouble, /// double, - tFloat, /// float, - tUbyte, /// ubyte, - tUcent, /// ucent, - tUint, /// uint, - tUlong, /// ulong, - tShort, /// short, - tReal, /// real, - tLong, /// long, - tInt, /// int, - tFunction, /// function, - tIdouble, /// idouble, - tIreal, /// ireal, - tWchar, /// wchar, - tVoid, /// void, - tUshort, /// ushort, - tIfloat, /// if loat, + Bool, /// bool, + Byte, /// byte, + Cdouble, /// cdouble, + Cent, /// cent, + Cfloat, /// cfloat, + Char, /// char, + Creal, /// creal, + Dchar, /// dchar, + Double, /// double, + DString, /// dstring + Float, /// float, + Function, /// function, + Idouble, /// idouble, + Ifloat, /// ifloat, + Int, /// int, + Ireal, /// ireal, + Long, /// long, + Real, /// real, + Short, /// short, + String, /// string + Ubyte, /// ubyte, + Ucent, /// ucent, + Uint, /// uint, + Ulong, /// ulong, + Ushort, /// ushort, + Void, /// void, + Wchar, /// wchar, + WString, /// wstring TYPES_END, - tTemplate, /// template, + Template, /// template, -// Keywords + // Keywords KEYWORDS_BEGIN, ATTRIBUTES_BEGIN, - tExtern, /// extern, - tAlign, /// align, - tPragma, /// pragma, - tDeprecated, /// deprecated, + Align, /// align, + Deprecated, /// deprecated, + Extern, /// extern, + Pragma, /// pragma, PROTECTION_BEGIN, - tPackage, /// package, - tPrivate, /// private, - tProtected, /// protected, - tPublic, /// public, - tExport, /// export, + Export, /// export, + Package, /// package, + Private, /// private, + Protected, /// protected, + Public, /// public, PROTECTION_END, - tStatic, /// static, - tSynchronized, /// synchronized, - tFinal, /// final - tAbstract, /// abstract, - tConst, /// const, - tAuto, /// auto, - tScope, /// scope, - t__gshared, /// __gshared, - tShared, // shared, - tImmutable, // immutable, - tInout, // inout, - atDisable, /// @disable + Abstract, /// abstract, + AtDisable, /// @disable + Auto, /// auto, + Const, /// const, + Final, /// final + Gshared, /// __gshared, + Immutable, // immutable, + Inout, // inout, + Scope, /// scope, + Shared, // shared, + Static, /// static, + Synchronized, /// synchronized, ATTRIBUTES_END, - tAlias, /// alias, - tAsm, /// asm, - tAssert, /// assert, - tBody, /// body, - tBreak, /// break, - tCase, /// case, - tCast, /// cast, - tCatch, /// catch, - tClass, /// class, - tContinue, /// continue, - tDebug, /// debug, - tDefault, /// default, - tDelegate, /// delegate, - tDelete, /// delete, - tDo, /// do, - tElse, /// else, - tEnum, /// enum, - tFalse, /// false, - tFinally, /// finally, - tFor, /// for, - tForeach, /// foreach, - tForeach_reverse, /// foreach_reverse, - tGoto, /// goto, - tIf, /// if , - tImport, /// import, - tIn, /// in, - tInterface, /// interface, - tInvariant, /// invariant, - tIs, /// is, - tLazy, /// lazy, - tMacro, /// macro, - tMixin, /// mixin, - tModule, /// module, - tNew, /// new, - tNothrow, /// nothrow, - tNull, /// null, - tOut, /// out, - tOverride, /// override, - tPure, /// pure, - tRef, /// ref, - tReturn, /// return, - tStruct, /// struct, - tSuper, /// super, - tSwitch, /// switch , - tThis, /// this, - tThrow, /// throw, - tTrue, /// true, - tTry, /// try, - tTypedef, /// typedef, - tTypeid, /// typeid, - tTypeof, /// typeof, - tUnion, /// union, - tUnittest, /// unittest, - tVersion, /// version, - tVolatile, /// volatile, - tWhile, /// while , - tWith, /// with, + Alias, /// alias, + Asm, /// asm, + Assert, /// assert, + Body, /// body, + Break, /// break, + Case, /// case, + Cast, /// cast, + Catch, /// catch, + Class, /// class, + Continue, /// continue, + Debug, /// debug, + Default, /// default, + Delegate, /// delegate, + Delete, /// delete, + Do, /// do, + Else, /// else, + Enum, /// enum, + False, /// false, + Finally, /// finally, + Foreach, /// foreach, + Foreach_reverse, /// foreach_reverse, + For, /// for, + Goto, /// goto, + If, /// if , + Import, /// import, + In, /// in, + Interface, /// interface, + Invariant, /// invariant, + Is, /// is, + Lazy, /// lazy, + Macro, /// macro, + Mixin, /// mixin, + Module, /// module, + New, /// new, + Nothrow, /// nothrow, + Null, /// null, + Out, /// out, + Override, /// override, + Pure, /// pure, + Ref, /// ref, + Return, /// return, + Struct, /// struct, + Super, /// super, + Switch, /// switch , + This, /// this, + Throw, /// throw, + True, /// true, + Try, /// try, + Typedef, /// typedef, + Typeid, /// typeid, + Typeof, /// typeof, + Union, /// union, + Unittest, /// unittest, + Version, /// version, + Volatile, /// volatile, + While, /// while , + With, /// with, KEYWORDS_END, // Constants CONSTANTS_BEGIN, - t__FILE__, /// __FILE__, - t__LINE__, /// __LINE__, - - t__thread, /// __thread, - t__traits, /// __traits, + File, /// __FILE__, + Line, /// __LINE__, + Thread, /// __thread, + Traits, /// __traits, CONSTANTS_END, // Properties PROPERTIES_BEGIN, - - atProperty, /// @property - atSafe, /// @safe - atSystem, /// @system - atTrusted, /// @trusted + AtProperty, /// @property + AtSafe, /// @safe + AtSystem, /// @system + AtTrusted, /// @trusted PROPERTIES_END, // Misc MISC_BEGIN, - scriptLine, // Line at the beginning of source file that starts from #! - comment, /// /** comment */ or // comment or ///comment + Blank, /// unknown token type + Comment, /// /** comment */ or // comment or ///comment + Identifier, /// anything else + ScriptLine, // Line at the beginning of source file that starts from #! + Whitespace, /// whitespace NUMBERS_BEGIN, - floatLiteral, /// 123.456f or 0x123_45p-af - doubleLiteral, /// 123.456 - realLiteral, /// 123.456L - intLiteral, /// 123 or 0b1101010101 - unsignedIntLiteral, /// 123u - longLiteral, /// 123L - unsignedLongLiteral, /// 123uL + DoubleLiteral, /// 123.456 + FloatLiteral, /// 123.456f or 0x123_45p-af + IntLiteral, /// 123 or 0b1101010101 + LongLiteral, /// 123L + RealLiteral, /// 123.456L + UnsignedIntLiteral, /// 123u + UnsignedLongLiteral, /// 123uL NUMBERS_END, STRINGS_BEGIN, - stringLiteral, /// "a string" - wStringLiteral, /// "16-bit character string"w - dStringLiteral, /// "32-bit character string"d + DStringLiteral, /// "32-bit character string"d + StringLiteral, /// "a string" + WStringLiteral, /// "16-bit character string"w STRINGS_END, - identifier, /// anything else - whitespace, /// whitespace - blank, /// unknown token type MISC_END, } @@ -287,121 +284,121 @@ immutable TokenType[string] tokenLookup; static this() { tokenLookup = [ - "abstract" : TokenType.tAbstract, - "alias" : TokenType.tAlias, - "align" : TokenType.tAlign, - "asm" : TokenType.tAsm, - "assert" : TokenType.tAssert, - "auto" : TokenType.tAuto, - "body" : TokenType.tBody, - "bool" : TokenType.tBool, - "break" : TokenType.tBreak, - "byte" : TokenType.tByte, - "case" : TokenType.tCase, - "cast" : TokenType.tCast, - "catch" : TokenType.tCatch, - "cdouble" : TokenType.tCdouble, - "cent" : TokenType.tCent, - "cfloat" : TokenType.tCfloat, - "char" : TokenType.tChar, - "class" : TokenType.tClass, - "const" : TokenType.tConst, - "continue" : TokenType.tContinue, - "creal" : TokenType.tCreal, - "dchar" : TokenType.tDchar, - "debug" : TokenType.tDebug, - "default" : TokenType.tDefault, - "delegate" : TokenType.tDelegate, - "delete" : TokenType.tDelete, - "deprecated" : TokenType.tDeprecated, - "do" : TokenType.tDo, - "double" : TokenType.tDouble, - "dstring" : TokenType.tDString, - "else" : TokenType.tElse, - "enum" : TokenType.tEnum, - "export" : TokenType.tExport, - "extern" : TokenType.tExtern, - "false" : TokenType.tFalse, - "final" : TokenType.tFinal, - "finally" : TokenType.tFinally, - "float" : TokenType.tFloat, - "for" : TokenType.tFor, - "foreach" : TokenType.tForeach, - "foreach_reverse" : TokenType.tForeach_reverse, - "function" : TokenType.tFunction, - "goto" : TokenType.tGoto, - "idouble" : TokenType.tIdouble, - "if" : TokenType.tIf, - "ifloat" : TokenType.tIfloat, - "immutable" : TokenType.tImmutable, - "import" : TokenType.tImport, - "in" : TokenType.tIn, - "inout" : TokenType.tInout, - "int" : TokenType.tInt, - "interface" : TokenType.tInterface, - "invariant" : TokenType.tInvariant, - "ireal" : TokenType.tIreal, - "is" : TokenType.tIs, - "lazy" : TokenType.tLazy, - "long" : TokenType.tLong, - "macro" : TokenType.tMacro, - "mixin" : TokenType.tMixin, - "module" : TokenType.tModule, - "new" : TokenType.tNew, - "nothrow" : TokenType.tNothrow, - "null" : TokenType.tNull, - "out" : TokenType.tOut, - "override" : TokenType.tOverride, - "package" : TokenType.tPackage, - "pragma" : TokenType.tPragma, - "private" : TokenType.tPrivate, - "protected" : TokenType.tProtected, - "public" : TokenType.tPublic, - "pure" : TokenType.tPure, - "real" : TokenType.tReal, - "ref" : TokenType.tRef, - "return" : TokenType.tReturn, - "scope" : TokenType.tScope, - "shared" : TokenType.tShared, - "short" : TokenType.tShort, - "static" : TokenType.tStatic, - "struct" : TokenType.tStruct, - "string" : TokenType.tString, - "super" : TokenType.tSuper, - "switch" : TokenType.tSwitch, - "synchronized" : TokenType.tSynchronized, - "template" : TokenType.tTemplate, - "this" : TokenType.tThis, - "throw" : TokenType.tThrow, - "true" : TokenType.tTrue, - "try" : TokenType.tTry, - "typedef" : TokenType.tTypedef, - "typeid" : TokenType.tTypeid, - "typeof" : TokenType.tTypeof, - "ubyte" : TokenType.tUbyte, - "ucent" : TokenType.tUcent, - "uint" : TokenType.tUint, - "ulong" : TokenType.tUlong, - "union" : TokenType.tUnion, - "unittest" : TokenType.tUnittest, - "ushort" : TokenType.tUshort, - "version" : TokenType.tVersion, - "void" : TokenType.tVoid, - "volatile" : TokenType.tVolatile, - "wchar" : TokenType.tWchar, - "while" : TokenType.tWhile, - "with" : TokenType.tWith, - "wstring" : TokenType.tWString, - "__FILE__" : TokenType.t__FILE__, - "__LINE__" : TokenType.t__LINE__, - "__gshared" : TokenType.t__gshared, - "__thread" : TokenType.t__thread, - "__traits" : TokenType.t__traits, - "@disable" : TokenType.atDisable, - "@property" : TokenType.atProperty, - "@safe" : TokenType.atSafe, - "@system" : TokenType.atSystem, - "@trusted" : TokenType.atTrusted, + "abstract" : TokenType.Abstract, + "alias" : TokenType.Alias, + "align" : TokenType.Align, + "asm" : TokenType.Asm, + "assert" : TokenType.Assert, + "auto" : TokenType.Auto, + "body" : TokenType.Body, + "bool" : TokenType.Bool, + "break" : TokenType.Break, + "byte" : TokenType.Byte, + "case" : TokenType.Case, + "cast" : TokenType.Cast, + "catch" : TokenType.Catch, + "cdouble" : TokenType.Cdouble, + "cent" : TokenType.Cent, + "cfloat" : TokenType.Cfloat, + "char" : TokenType.Char, + "class" : TokenType.Class, + "const" : TokenType.Const, + "continue" : TokenType.Continue, + "creal" : TokenType.Creal, + "dchar" : TokenType.Dchar, + "debug" : TokenType.Debug, + "default" : TokenType.Default, + "delegate" : TokenType.Delegate, + "delete" : TokenType.Delete, + "deprecated" : TokenType.Deprecated, + "@disable" : TokenType.AtDisable, + "do" : TokenType.Do, + "double" : TokenType.Double, + "dstring" : TokenType.DString, + "else" : TokenType.Else, + "enum" : TokenType.Enum, + "export" : TokenType.Export, + "extern" : TokenType.Extern, + "false" : TokenType.False, + "__FILE__" : TokenType.File, + "finally" : TokenType.Finally, + "final" : TokenType.Final, + "float" : TokenType.Float, + "foreach_reverse" : TokenType.Foreach_reverse, + "foreach" : TokenType.Foreach, + "for" : TokenType.For, + "function" : TokenType.Function, + "goto" : TokenType.Goto, + "__gshared" : TokenType.Gshared, + "idouble" : TokenType.Idouble, + "ifloat" : TokenType.Ifloat, + "if" : TokenType.If, + "immutable" : TokenType.Immutable, + "import" : TokenType.Import, + "inout" : TokenType.Inout, + "interface" : TokenType.Interface, + "in" : TokenType.In, + "int" : TokenType.Int, + "invariant" : TokenType.Invariant, + "ireal" : TokenType.Ireal, + "is" : TokenType.Is, + "lazy" : TokenType.Lazy, + "__LINE__" : TokenType.Line, + "long" : TokenType.Long, + "macro" : TokenType.Macro, + "mixin" : TokenType.Mixin, + "module" : TokenType.Module, + "new" : TokenType.New, + "nothrow" : TokenType.Nothrow, + "null" : TokenType.Null, + "out" : TokenType.Out, + "override" : TokenType.Override, + "package" : TokenType.Package, + "pragma" : TokenType.Pragma, + "private" : TokenType.Private, + "@property" : TokenType.AtProperty, + "protected" : TokenType.Protected, + "public" : TokenType.Public, + "pure" : TokenType.Pure, + "real" : TokenType.Real, + "ref" : TokenType.Ref, + "return" : TokenType.Return, + "@safe" : TokenType.AtSafe, + "scope" : TokenType.Scope, + "shared" : TokenType.Shared, + "short" : TokenType.Short, + "static" : TokenType.Static, + "string" : TokenType.String, + "struct" : TokenType.Struct, + "super" : TokenType.Super, + "switch" : TokenType.Switch, + "synchronized" : TokenType.Synchronized, + "@system" : TokenType.AtSystem, + "template" : TokenType.Template, + "this" : TokenType.This, + "__thread" : TokenType.Thread, + "throw" : TokenType.Throw, + "__traits" : TokenType.Traits, + "true" : TokenType.True, + "@trusted" : TokenType.AtTrusted, + "try" : TokenType.Try, + "typedef" : TokenType.Typedef, + "typeid" : TokenType.Typeid, + "typeof" : TokenType.Typeof, + "ubyte" : TokenType.Ubyte, + "ucent" : TokenType.Ucent, + "uint" : TokenType.Uint, + "ulong" : TokenType.Ulong, + "union" : TokenType.Union, + "unittest" : TokenType.Unittest, + "ushort" : TokenType.Ushort, + "version" : TokenType.Version, + "void" : TokenType.Void, + "volatile" : TokenType.Volatile, + "wchar" : TokenType.Wchar, + "while" : TokenType.While, + "with" : TokenType.With, + "wstring" : TokenType.WString, ]; } diff --git a/main.d b/main.d index ec14821..8d6e78b 100644 --- a/main.d +++ b/main.d @@ -27,12 +27,13 @@ pure bool isLineOfCode(TokenType t) { switch(t) { - case TokenType.semicolon: - case TokenType.tWhile: - case TokenType.tIf: - case TokenType.tFor: - case TokenType.tForeach: - case TokenType.tCase: + case TokenType.Semicolon: + case TokenType.While: + case TokenType.If: + case TokenType.For: + case TokenType.Foreach: + case TokenType.Foreach_reverse: + case TokenType.Case: return true; default: return false; diff --git a/parser.d b/parser.d index 71d061b..982ec7f 100644 --- a/parser.d +++ b/parser.d @@ -51,7 +51,7 @@ body */ const(Token)[] betweenBalancedBraces(const Token[] tokens, ref size_t index) { - return betweenBalanced(tokens, index, TokenType.lBrace, TokenType.rBrace); + return betweenBalanced(tokens, index, TokenType.LBrace, TokenType.RBrace); } @@ -60,7 +60,7 @@ const(Token)[] betweenBalancedBraces(const Token[] tokens, ref size_t index) */ const(Token)[] betweenBalancedParens(const Token[] tokens, ref size_t index) { - return betweenBalanced(tokens, index, TokenType.lParen, TokenType.rParen); + return betweenBalanced(tokens, index, TokenType.LParen, TokenType.RParen); } @@ -69,20 +69,27 @@ const(Token)[] betweenBalancedParens(const Token[] tokens, ref size_t index) */ const(Token)[] betweenBalancedBrackets(const Token[] tokens, ref size_t index) { - return betweenBalanced(tokens, index, TokenType.lBracket, TokenType.rBracket); + return betweenBalanced(tokens, index, TokenType.LBracket, TokenType.RBracket); } -void skipBalanced(alias Op, alias Cl)(const Token[] tokens, ref size_t index) + +/** + * If tokens[index] is currently openToken, advances index until it refers to a + * location in tokens directly after the balanced occurance of closeToken. If + * tokens[index] is closeToken, decrements index + * + */ +void skipBalanced(alias openToken, alias closeToken)(const Token[] tokens, ref size_t index) { - int depth = tokens[index] == Op ? 1 : -1; + int depth = tokens[index] == openToken ? 1 : -1; int deltaIndex = depth; index += deltaIndex; for (; index < tokens.length && index > 0 && depth != 0; index += deltaIndex) { switch (tokens[index].type) { - case Op: ++depth; break; - case Cl: --depth; break; + case openToken: ++depth; break; + case closeToken: --depth; break; default: break; } } @@ -90,12 +97,17 @@ void skipBalanced(alias Op, alias Cl)(const Token[] tokens, ref size_t index) void skipParens(const Token[] tokens, ref size_t index) { - skipBalanced!(TokenType.lParen, TokenType.rParen)(tokens, index); + skipBalanced!(TokenType.LParen, TokenType.RParen)(tokens, index); } void skipBrackets(const Token[] tokens, ref size_t index) { - skipBalanced!(TokenType.lBracket, TokenType.rBracket)(tokens, index); + skipBalanced!(TokenType.LBracket, TokenType.RBracket)(tokens, index); +} + +void skipBraces(const Token[] tokens, ref size_t index) +{ + skipBalanced!(TokenType.LBrace, TokenType.RBrace)(tokens, index); } /** @@ -122,7 +134,7 @@ body { if (tokens[index] == open) ++depth; else if (tokens[index] == close) --depth; - else if (tokens[index] == TokenType.comma) + else if (tokens[index] == TokenType.Comma) { app.put(", "); } @@ -139,7 +151,7 @@ body */ string parenContent(const Token[]tokens, ref size_t index) { - return "(" ~ content(tokens, index, TokenType.lParen, TokenType.rParen) ~ ")"; + return "(" ~ content(tokens, index, TokenType.LParen, TokenType.RParen) ~ ")"; } @@ -148,7 +160,7 @@ string parenContent(const Token[]tokens, ref size_t index) */ string bracketContent(const Token[]tokens, ref size_t index) { - return "[" ~ content(tokens, index, TokenType.lBracket, TokenType.rBracket) ~ "]"; + return "[" ~ content(tokens, index, TokenType.LBracket, TokenType.RBracket) ~ "]"; } @@ -159,11 +171,11 @@ string bracketContent(const Token[]tokens, ref size_t index) */ void skipBlockStatement(const Token[] tokens, ref size_t index) { - if (tokens[index] == TokenType.lBrace) + if (tokens[index] == TokenType.LBrace) betweenBalancedBraces(tokens, index); else { - skipPastNext(tokens, TokenType.semicolon, index); + skipPastNext(tokens, TokenType.Semicolon, index); } } @@ -177,11 +189,11 @@ void skipPastNext(const Token[] tokens, TokenType type, ref size_t index) { while (index < tokens.length) { - if (tokens[index].type == TokenType.lBrace) + if (tokens[index].type == TokenType.LBrace) betweenBalancedBraces(tokens, index); - else if (tokens[index].type == TokenType.lParen) + else if (tokens[index].type == TokenType.LParen) betweenBalancedParens(tokens, index); - else if (tokens[index].type == TokenType.lBracket) + else if (tokens[index].type == TokenType.LBracket) betweenBalancedBrackets(tokens, index); else if (tokens[index].type == type) { @@ -200,18 +212,18 @@ string parseTypeDeclaration(const Token[] tokens, ref size_t index) { switch (tokens[index].type) { - case TokenType.lBracket: + case TokenType.LBracket: type ~= bracketContent(tokens, index); break; - case TokenType.not: + case TokenType.Not: type ~= tokens[index++].value; - if (tokens[index] == TokenType.lParen) + if (tokens[index] == TokenType.LParen) type ~= parenContent(tokens, index); else type ~= tokens[index++].value; break; - case TokenType.star: - case TokenType.bitAnd: + case TokenType.Star: + case TokenType.BitAnd: type ~= tokens[index++].value; break; default: @@ -249,72 +261,72 @@ Module parseModule(const Token[] tokens, string protection = "public", string[] { switch(tokens[index].type) { - case TokenType.tElse: - case TokenType.tMixin: - case TokenType.tAssert: + case TokenType.Else: + case TokenType.Mixin: + case TokenType.Assert: ++index; tokens.skipBlockStatement(index); break; - case TokenType.tAlias: + case TokenType.Alias: tokens.skipBlockStatement(index); break; - case TokenType.tImport: + case TokenType.Import: mod.imports ~= parseImports(tokens, index); resetLocals(); break; - case TokenType.tVersion: + case TokenType.Version: ++index; - if (tokens[index] == TokenType.lParen) + if (tokens[index] == TokenType.LParen) { tokens.betweenBalancedParens(index); - if (tokens[index] == TokenType.lBrace) + if (tokens[index] == TokenType.LBrace) mod.merge(parseModule(betweenBalancedBraces(tokens, index), localProtection.empty() ? protection : localProtection, attributes)); } - else if (tokens[index] == TokenType.assign) + else if (tokens[index] == TokenType.Assign) tokens.skipBlockStatement(index); break; - case TokenType.tDeprecated: - case TokenType.tNothrow: - case TokenType.tOverride: - case TokenType.tSynchronized: - case TokenType.atDisable: - case TokenType.atProperty: - case TokenType.atSafe: - case TokenType.atSystem: - case TokenType.tAbstract: - case TokenType.tFinal: - case TokenType.t__gshared: - case TokenType.tStatic: + case TokenType.Deprecated: + case TokenType.Nothrow: + case TokenType.Override: + case TokenType.Synchronized: + case TokenType.AtDisable: + case TokenType.AtProperty: + case TokenType.AtSafe: + case TokenType.AtSystem: + case TokenType.Abstract: + case TokenType.Final: + case TokenType.Gshared: + case TokenType.Static: localAttributes ~= tokens[index++].value; break; - case TokenType.tConst: - case TokenType.tImmutable: - case TokenType.tInout: - case TokenType.tPure: - case TokenType.tScope: - case TokenType.tShared: + case TokenType.Const: + case TokenType.Immutable: + case TokenType.Inout: + case TokenType.Pure: + case TokenType.Scope: + case TokenType.Shared: auto tmp = tokens[index++].value; - if (tokens[index] == TokenType.lParen) + if (tokens[index] == TokenType.LParen) type = tmp ~ parenContent(tokens, index); - else if (tokens[index] == TokenType.colon) + else if (tokens[index] == TokenType.Colon) { index++; attributes ~= tmp; } localAttributes ~= tmp; break; - case TokenType.tAlign: - case TokenType.tExtern: + case TokenType.Align: + case TokenType.Extern: string attribute = tokens[index++].value; - if (tokens[index] == TokenType.lParen) + if (tokens[index] == TokenType.LParen) attribute ~= parenContent(tokens, index); - if (tokens[index] == TokenType.lBrace) + if (tokens[index] == TokenType.LBrace) mod.merge(parseModule(betweenBalancedBraces(tokens, index), localProtection.empty() ? protection : localProtection, attributes ~ attribute)); - else if (tokens[index] == TokenType.colon) + else if (tokens[index] == TokenType.Colon) { ++index; attributes ~= attribute; @@ -324,66 +336,66 @@ Module parseModule(const Token[] tokens, string protection = "public", string[] break; case TokenType.PROTECTION_BEGIN: .. case TokenType.PROTECTION_END: string p = tokens[index++].value; - if (tokens[index] == TokenType.colon) + if (tokens[index] == TokenType.Colon) { protection = p; ++index; } - else if (tokens[index] == TokenType.lBrace) + else if (tokens[index] == TokenType.LBrace) mod.merge(parseModule(betweenBalancedBraces(tokens, index), p, attributes ~ localAttributes)); else localProtection = p; break; - case TokenType.tModule: + case TokenType.Module: ++index; - while (index < tokens.length && tokens[index] != TokenType.semicolon) + while (index < tokens.length && tokens[index] != TokenType.Semicolon) mod.name ~= tokens[index++].value; ++index; resetLocals(); break; - case TokenType.tUnion: + case TokenType.Union: mod.unions ~= parseUnion(tokens, index, localProtection.empty() ? protection : localProtection, localAttributes ~ attributes); resetLocals(); break; - case TokenType.tClass: + case TokenType.Class: mod.classes ~= parseClass(tokens, index, localProtection.empty() ? protection : localProtection, localAttributes ~ attributes); resetLocals(); break; - case TokenType.tInterface: + case TokenType.Interface: mod.interfaces ~= parseInterface(tokens, index, localProtection.empty() ? protection : localProtection, localAttributes ~ attributes); resetLocals(); break; - case TokenType.tStruct: + case TokenType.Struct: mod.structs ~= parseStruct(tokens, index, localProtection.empty() ? protection : localProtection, localAttributes ~ attributes); resetLocals(); break; - case TokenType.tEnum: + case TokenType.Enum: mod.enums ~= parseEnum(tokens, index, localProtection.empty() ? protection : localProtection, localAttributes ~ attributes); resetLocals(); break; - case TokenType.tTemplate: + case TokenType.Template: ++index; // template ++index; // name - if (tokens[index] == TokenType.lParen) + if (tokens[index] == TokenType.LParen) tokens.betweenBalancedParens(index); // params - if (tokens[index] == TokenType.lBrace) + if (tokens[index] == TokenType.LBrace) tokens.betweenBalancedBraces(index); // body resetLocals(); break; case TokenType.TYPES_BEGIN: .. case TokenType.TYPES_END: - case TokenType.tAuto: - case TokenType.identifier: + case TokenType.Auto: + case TokenType.Identifier: if (type.empty()) { type = tokens.parseTypeDeclaration(index); @@ -392,7 +404,7 @@ Module parseModule(const Token[] tokens, string protection = "public", string[] { name = tokens[index++].value; if (index >= tokens.length) break; - if (tokens[index] == TokenType.lParen) + if (tokens[index] == TokenType.LParen) { mod.functions ~= parseFunction(tokens, index, type, name, tokens[index].lineNumber, @@ -412,23 +424,23 @@ Module parseModule(const Token[] tokens, string protection = "public", string[] resetLocals(); } break; - case TokenType.tUnittest: + case TokenType.Unittest: ++index; - if (!tokens.empty() && tokens[index] == TokenType.lBrace) + if (!tokens.empty() && tokens[index] == TokenType.LBrace) tokens.skipBlockStatement(index); resetLocals(); break; - case TokenType.tilde: + case TokenType.Tilde: ++index; - if (tokens[index] == TokenType.tThis) + if (tokens[index] == TokenType.This) { name = "~"; goto case; } break; - case TokenType.tThis: + case TokenType.This: name ~= tokens[index++].value; - if (tokens[index] == TokenType.lParen) + if (tokens[index] == TokenType.LParen) { mod.functions ~= parseFunction(tokens, index, "", name, tokens[index - 1].lineNumber, @@ -453,7 +465,7 @@ Module parseModule(const Token[] tokens, string protection = "public", string[] */ string[] parseImports(const Token[] tokens, ref size_t index) { - assert(tokens[index] == TokenType.tImport); + assert(tokens[index] == TokenType.Import); ++index; auto app = appender!(string[])(); string im; @@ -461,17 +473,17 @@ string[] parseImports(const Token[] tokens, ref size_t index) { switch(tokens[index].type) { - case TokenType.comma: + case TokenType.Comma: ++index; app.put(im); im = ""; break; - case TokenType.assign: - case TokenType.semicolon: + case TokenType.Assign: + case TokenType.Semicolon: app.put(im); ++index; return app.data; - case TokenType.colon: + case TokenType.Colon: app.put(im); tokens.skipBlockStatement(index); return app.data; @@ -491,7 +503,7 @@ Enum parseEnum(const Token[] tokens, ref size_t index, string protection, string[] attributes) in { - assert (tokens[index] == TokenType.tEnum); + assert (tokens[index] == TokenType.Enum); } body { @@ -500,7 +512,7 @@ body e.line = tokens[index].lineNumber; e.name = tokens[index++].value; - if (tokens[index] == TokenType.colon) + if (tokens[index] == TokenType.Colon) { ++index; e.type = tokens[index++].value; @@ -508,7 +520,7 @@ body else e.type = "uint"; - if (tokens[index] != TokenType.lBrace) + if (tokens[index] != TokenType.LBrace) { tokens.skipBlockStatement(index); return e; @@ -517,13 +529,13 @@ body auto r = betweenBalancedBraces(tokens, index); for (size_t i = 0; i < r.length;) { - if (r[i].type == TokenType.identifier) + if (r[i].type == TokenType.Identifier) { EnumMember member; member.line = r[i].lineNumber; member.name = r[i].value; e.members ~= member; - r.skipPastNext(TokenType.comma, i); + r.skipPastNext(TokenType.Comma, i); } else ++i; @@ -539,7 +551,7 @@ Function parseFunction(const Token[] tokens, ref size_t index, string type, string name, uint line, string protection, string[] attributes) in { - assert (tokens[index] == TokenType.lParen); + assert (tokens[index] == TokenType.LParen); } body { @@ -550,7 +562,7 @@ body f.attributes.insertInPlace(f.attributes.length, attributes); Variable[] vars1 = parseParameters(tokens, index); - if (tokens[index] == TokenType.lParen) + if (tokens[index] == TokenType.LParen) { f.templateParameters.insertInPlace(f.templateParameters.length, map!("a.type")(vars1)); @@ -564,14 +576,14 @@ body { switch (tokens[index].type) { - case TokenType.tImmutable: - case TokenType.tConst: - case TokenType.tPure: - case TokenType.atTrusted: - case TokenType.atProperty: - case TokenType.tNothrow: - case TokenType.tFinal: - case TokenType.tOverride: + case TokenType.Immutable: + case TokenType.Const: + case TokenType.Pure: + case TokenType.AtTrusted: + case TokenType.AtProperty: + case TokenType.Nothrow: + case TokenType.Final: + case TokenType.Override: f.attributes ~= tokens[index++].value; break; default: @@ -579,21 +591,21 @@ body } } - if (tokens[index] == TokenType.tIf) + if (tokens[index] == TokenType.If) f.constraint = parseConstraint(tokens, index); while (index < tokens.length && - (tokens[index] == TokenType.tIn || tokens[index] == TokenType.tOut - || tokens[index] == TokenType.tBody)) + (tokens[index] == TokenType.In || tokens[index] == TokenType.Out + || tokens[index] == TokenType.Body)) { ++index; - if (index < tokens.length && tokens[index] == TokenType.lBrace) + if (index < tokens.length && tokens[index] == TokenType.LBrace) tokens.skipBlockStatement(index); } if (index >= tokens.length) return f; - if (tokens[index] == TokenType.lBrace) + if (tokens[index] == TokenType.LBrace) tokens.skipBlockStatement(index); - else if (tokens[index] == TokenType.semicolon) + else if (tokens[index] == TokenType.Semicolon) ++index; return f; } @@ -601,16 +613,16 @@ body string parseConstraint(const Token[] tokens, ref size_t index) { auto appender = appender!(string)(); - assert(tokens[index] == TokenType.tIf); + assert(tokens[index] == TokenType.If); appender.put(tokens[index++].value); - assert(tokens[index] == TokenType.lParen); + assert(tokens[index] == TokenType.LParen); return "if " ~ parenContent(tokens, index); } Variable[] parseParameters(const Token[] tokens, ref size_t index) in { - assert (tokens[index] == TokenType.lParen); + assert (tokens[index] == TokenType.LParen); } body { @@ -622,28 +634,28 @@ body { switch(r[i].type) { - case TokenType.tIn: - case TokenType.tOut: - case TokenType.tRef: - case TokenType.tScope: - case TokenType.tLazy: - case TokenType.tConst: - case TokenType.tImmutable: - case TokenType.tShared: - case TokenType.tInout: + case TokenType.In: + case TokenType.Out: + case TokenType.Ref: + case TokenType.Scope: + case TokenType.Lazy: + case TokenType.Const: + case TokenType.Immutable: + case TokenType.Shared: + case TokenType.Inout: auto tmp = r[i++].value; - if (r[i] == TokenType.lParen) + if (r[i] == TokenType.LParen) v.type ~= tmp ~ parenContent(r, i); else v.attributes ~= tmp; break; - case TokenType.colon: + case TokenType.Colon: i++; - r.skipPastNext(TokenType.comma, i); + r.skipPastNext(TokenType.Comma, i); appender.put(v); v = new Variable; break; - case TokenType.comma: + case TokenType.Comma: ++i; appender.put(v); v = new Variable; @@ -660,12 +672,12 @@ body v.line = r[i].lineNumber; v.name = r[i++].value; appender.put(v); - if (i < r.length && r[i] == TokenType.vararg) + if (i < r.length && r[i] == TokenType.Vararg) { v.type ~= " ..."; } v = new Variable; - r.skipPastNext(TokenType.comma, i); + r.skipPastNext(TokenType.Comma, i); } break; } @@ -676,7 +688,7 @@ body string[] parseBaseClassList(const Token[] tokens, ref size_t index) in { - assert(tokens[index] == TokenType.colon); + assert(tokens[index] == TokenType.Colon); } body { @@ -684,11 +696,11 @@ body ++index; while (index < tokens.length) { - if (tokens[index] == TokenType.identifier) + if (tokens[index] == TokenType.Identifier) { string base = parseTypeDeclaration(tokens, index); appender.put(base); - if (tokens[index] == TokenType.comma) + if (tokens[index] == TokenType.Comma) ++index; else break; @@ -717,18 +729,18 @@ Struct parseStructOrUnion(const Token[] tokens, ref size_t index, string protect s.attributes = attributes; s.protection = protection; s.name = tokens[index++].value; - if (tokens[index] == TokenType.lParen) + if (tokens[index] == TokenType.LParen) s.templateParameters.insertInPlace(s.templateParameters.length, map!("a.type")(parseParameters(tokens, index))); if (index >= tokens.length) return s; - if (tokens[index] == TokenType.tIf) + if (tokens[index] == TokenType.If) s.constraint = parseConstraint(tokens, index); if (index >= tokens.length) return s; - if (tokens[index] == TokenType.lBrace) + if (tokens[index] == TokenType.LBrace) parseStructBody(tokens, index, s); else tokens.skipBlockStatement(index); @@ -739,7 +751,7 @@ Struct parseStruct(const Token[] tokens, ref size_t index, string protection, string[] attributes) in { - assert(tokens[index] == TokenType.tStruct); + assert(tokens[index] == TokenType.Struct); } body { @@ -750,7 +762,7 @@ Struct parseUnion(const Token[] tokens, ref size_t index, string protection, string[] attributes) in { - assert(tokens[index] == TokenType.tUnion); + assert(tokens[index] == TokenType.Union); } body { @@ -765,23 +777,23 @@ Inherits parseInherits(const Token[] tokens, ref size_t index, string protection i.name = tokens[index++].value; i.protection = protection; i.attributes.insertInPlace(i.attributes.length, attributes); - if (tokens[index] == TokenType.lParen) + if (tokens[index] == TokenType.LParen) i.templateParameters.insertInPlace(i.templateParameters.length, map!("a.type")(parseParameters(tokens, index))); if (index >= tokens.length) return i; - if (tokens[index] == TokenType.tIf) + if (tokens[index] == TokenType.If) i.constraint = parseConstraint(tokens, index); if (index >= tokens.length) return i; - if (tokens[index] == TokenType.colon) + if (tokens[index] == TokenType.Colon) i.baseClasses = parseBaseClassList(tokens, index); if (index >= tokens.length) return i; - if (tokens[index] == TokenType.lBrace) + if (tokens[index] == TokenType.LBrace) parseStructBody(tokens, index, i); else tokens.skipBlockStatement(index); @@ -792,7 +804,7 @@ Inherits parseInterface(const Token[] tokens, ref size_t index, string protectio string[] attributes) in { - assert (tokens[index] == TokenType.tInterface); + assert (tokens[index] == TokenType.Interface); } body { @@ -804,7 +816,7 @@ Inherits parseClass(const Token[] tokens, ref size_t index, string protection, string[] attributes) in { - assert(tokens[index] == TokenType.tClass); + assert(tokens[index] == TokenType.Class); } body { diff --git a/tokenizer.d b/tokenizer.d index 13394bf..fbbf110 100644 --- a/tokenizer.d +++ b/tokenizer.d @@ -29,7 +29,7 @@ import codegen; * Returns: The whitespace, or null if style was CODE_ONLY */ pure nothrow string lexWhitespace(S)(S inputString, ref size_t endIndex, - ref uint lineNumber, IterationStyle style = IterationStyle.CODE_ONLY) // I suggest to remove the last param + ref uint lineNumber) if (isSomeString!S) { immutable startIndex = endIndex; @@ -39,13 +39,7 @@ pure nothrow string lexWhitespace(S)(S inputString, ref size_t endIndex, lineNumber++; ++endIndex; } - final switch (style) - { - case IterationStyle.EVERYTHING: - return inputString[startIndex .. endIndex]; - case IterationStyle.CODE_ONLY: - return null; - } + return inputString[startIndex .. endIndex]; } /** @@ -257,7 +251,7 @@ pure nothrow Token lexNumber(S)(ref S inputString, ref size_t endIndex) endIndex++; if (isEoF(inputString, endIndex)) { - token.type = TokenType.intLiteral; + token.type = TokenType.IntLiteral; token.value = inputString[startIndex .. endIndex]; return token; } @@ -277,7 +271,7 @@ pure nothrow Token lexNumber(S)(ref S inputString, ref size_t endIndex) lexHex(inputString, startIndex, ++endIndex, token); return token; default: - token.type = TokenType.intLiteral; + token.type = TokenType.IntLiteral; token.value = inputString[startIndex .. endIndex]; return token; } @@ -295,7 +289,7 @@ pure nothrow void lexBinary(S)(ref S inputString, size_t startIndex, bool lexingSuffix = false; bool isLong = false; bool isUnsigned = false; - token.type = TokenType.intLiteral; + token.type = TokenType.IntLiteral; binaryLoop: while (!isEoF(inputString, endIndex)) { switch (inputString[endIndex]) @@ -315,11 +309,11 @@ pure nothrow void lexBinary(S)(ref S inputString, size_t startIndex, lexingSuffix = true; if (isLong) { - token.type = TokenType.unsignedLongLiteral; + token.type = TokenType.UnsignedLongLiteral; break binaryLoop; } else - token.type = TokenType.unsignedIntLiteral; + token.type = TokenType.UnsignedIntLiteral; isUnsigned = true; break; case 'L': @@ -329,11 +323,11 @@ pure nothrow void lexBinary(S)(ref S inputString, size_t startIndex, lexingSuffix = true; if (isUnsigned) { - token.type = TokenType.unsignedLongLiteral; + token.type = TokenType.UnsignedLongLiteral; break binaryLoop; } else - token.type = TokenType.longLiteral; + token.type = TokenType.LongLiteral; isLong = true; break; default: @@ -356,7 +350,7 @@ pure nothrow void lexDecimal(S)(ref S inputString, size_t startIndex, bool foundDot = false; bool foundE = false; bool foundPlusMinus = false; - token.type = TokenType.intLiteral; + token.type = TokenType.IntLiteral; decimalLoop: while (!isEoF(inputString, endIndex)) { switch (inputString[endIndex]) @@ -369,10 +363,30 @@ pure nothrow void lexDecimal(S)(ref S inputString, size_t startIndex, break; case 'e': case 'E': - if (foundE) + // For this to be a valid exponent, the next character must be a + // decimal character or a sign + if (foundE || isEoF(inputString, endIndex + 1)) break decimalLoop; + switch (inputString[endIndex + 1]) + { + case '+': + case '-': + if (isEoF(inputString, endIndex + 2) + || inputString[endIndex + 2] < '0' + || inputString[endIndex + 2] > '9') + { + break decimalLoop; + } + break; + case '0': .. case '9': + break; + default: + break decimalLoop; + } ++endIndex; foundE = true; + isDouble = true; + token.type = TokenType.DoubleLiteral; break; case '+': case '-': @@ -388,7 +402,7 @@ pure nothrow void lexDecimal(S)(ref S inputString, size_t startIndex, break decimalLoop; // two dots with other characters between them ++endIndex; foundDot = true; - token.type = TokenType.doubleLiteral; + token.type = TokenType.DoubleLiteral; isDouble = true; break; case 'u': @@ -398,9 +412,9 @@ pure nothrow void lexDecimal(S)(ref S inputString, size_t startIndex, ++endIndex; lexingSuffix = true; if (isLong) - token.type = TokenType.unsignedLongLiteral; + token.type = TokenType.UnsignedLongLiteral; else - token.type = TokenType.unsignedIntLiteral; + token.type = TokenType.UnsignedIntLiteral; isUnsigned = true; break; case 'L': @@ -411,11 +425,11 @@ pure nothrow void lexDecimal(S)(ref S inputString, size_t startIndex, ++endIndex; lexingSuffix = true; if (isDouble) - token.type = TokenType.realLiteral; + token.type = TokenType.RealLiteral; else if (isUnsigned) - token.type = TokenType.unsignedLongLiteral; + token.type = TokenType.UnsignedLongLiteral; else - token.type = TokenType.longLiteral; + token.type = TokenType.LongLiteral; isLong = true; break; case 'f': @@ -424,40 +438,70 @@ pure nothrow void lexDecimal(S)(ref S inputString, size_t startIndex, if (isUnsigned || isLong) break decimalLoop; ++endIndex; - token.type = TokenType.floatLiteral; + token.type = TokenType.FloatLiteral; break decimalLoop; + case 'i': + ++endIndex; + // Spec says that this is the last suffix, so all cases break the + // loop. + if (isDouble) + { + token.type = TokenType.Idouble; + break decimalLoop; + } + else if (isFloat) + { + token.type = TokenType.Ifloat; + break decimalLoop; + } + else if (isReal) + { + token.type = TokenType.Ireal; + break decimalLoop; + } + else + { + // There is no imaginary int + --endIndex; + break decimalLoop; + } default: break decimalLoop; } } - // suggest to extract lexing integers into a separate function - // please see unittest below - token.value = inputString[startIndex .. endIndex]; } + unittest { - dump!lexDecimal("55e-4"); // yeilds intLiteral, but should be float - dump!lexDecimal("3e+f"); // floatLiteral, but should be considered invalid - dump!lexDecimal("3e++f"); // intLiteral 3e+, but should be considered invalid - // actually, there are lots of bugs. The point is that without decomposition of integer lexing from floating-point lexing - // it is very hard to prove algorithm correctness + Token t; + size_t start, end; + lexDecimal!string("55e-4", start, end, t); + assert(t.value == "55e-4"); + assert(t.type == TokenType.DoubleLiteral); + + start = end = 0; + lexDecimal!string("123.45f", start, end, t); + assert(t.value == "123.45f"); + assert(t.type == TokenType.FloatLiteral); + + start = end = 0; + lexDecimal!string("3e+f", start, end, t); + assert(t.value == "3"); + assert(t.type == TokenType.IntLiteral); + + start = end = 0; + lexDecimal!string("3e++f", start, end, t); + assert(t.value == "3"); + assert(t.type == TokenType.IntLiteral); + + start = end = 0; + lexDecimal!string("1234..1237", start, end, t); + assert(t.value == "1234"); + assert(t.type == TokenType.IntLiteral); } -// Temporary function to illustrate some problems -// Executes T and dumps results to console -void dump(alias T)(string s) { - size_t start; - size_t end; - Token tok; - T!(string)(s, start, end, tok); - // dump results - writeln(tok.type); - writeln(tok.value); - writeln(start); - writeln(end); -} nothrow void lexHex(S)(ref S inputString, ref size_t startIndex, ref size_t endIndex, ref Token token) if (isSomeString!S) @@ -471,7 +515,7 @@ nothrow void lexHex(S)(ref S inputString, ref size_t startIndex, bool foundDot = false; bool foundE = false; bool foundPlusMinus = false; - token.type = TokenType.intLiteral; + token.type = TokenType.IntLiteral; hexLoop: while (!isEoF(inputString, endIndex)) { switch (inputString[endIndex]) @@ -505,7 +549,7 @@ nothrow void lexHex(S)(ref S inputString, ref size_t startIndex, break hexLoop; // two dots with other characters between them ++endIndex; foundDot = true; - token.type = TokenType.doubleLiteral; + token.type = TokenType.DoubleLiteral; isDouble = true; break; default: @@ -566,7 +610,7 @@ Token[] tokenize(S)(S inputString, IterationStyle iterationStyle = IterationStyl Token currentToken; currentToken.lineNumber = lineNumber; // lineNumber is always 1 currentToken.value = lexScriptLine(inputString, endIndex, lineNumber); - currentToken.type = TokenType.scriptLine; + currentToken.type = TokenType.ScriptLine; } while (!isEoF(inputString, endIndex)) @@ -580,8 +624,8 @@ Token[] tokenize(S)(S inputString, IterationStyle iterationStyle = IterationStyl { currentToken.lineNumber = lineNumber; currentToken.value = lexWhitespace(inputString, endIndex, - lineNumber, IterationStyle.EVERYTHING); // note: I suggest to remove the last parameter to simplify lexWhitespace - currentToken.type = TokenType.whitespace; + lineNumber); + currentToken.type = TokenType.Whitespace; tokenAppender.put(currentToken); } else @@ -593,66 +637,66 @@ Token[] tokenize(S)(S inputString, IterationStyle iterationStyle = IterationStyl outerSwitch: switch(inputString[endIndex]) { mixin(generateCaseTrie( - "=", "TokenType.assign", - "&", "TokenType.bitAnd", - "&=", "TokenType.bitAndEquals", - "|", "TokenType.bitOr", - "|=", "TokenType.bitOrEquals", - "~=", "TokenType.catEquals", - ":", "TokenType.colon", - ",", "TokenType.comma", - "$", "TokenType.dollar", - ".", "TokenType.dot", - "==", "TokenType.equals", - "=>", "TokenType.goesTo", - ">", "TokenType.greater", - ">=", "TokenType.greaterEqual", - "#", "TokenType.hash", - "&&", "TokenType.logicAnd", - "{", "TokenType.lBrace", - "[", "TokenType.lBracket", - "<", "TokenType.less", - "<=", "TokenType.lessEqual", - "<>=", "TokenType.lessEqualGreater", - "<>", "TokenType.lessOrGreater", - "||", "TokenType.logicOr", - "(", "TokenType.lParen", - "-", "TokenType.minus", - "-=", "TokenType.minusEquals", - "%", "TokenType.mod", - "%=", "TokenType.modEquals", - "*=", "TokenType.mulEquals", - "!", "TokenType.not", - "!=", "TokenType.notEquals", - "!>", "TokenType.notGreater", - "!>=", "TokenType.notGreaterEqual", - "!<", "TokenType.notLess", - "!<=", "TokenType.notLessEqual", - "!<>", "TokenType.notLessEqualGreater", - "+", "TokenType.plus", - "+=", "TokenType.plusEquals", - "^^", "TokenType.pow", - "^^=", "TokenType.powEquals", - "}", "TokenType.rBrace", - "]", "TokenType.rBracket", - ")", "TokenType.rParen", - ";", "TokenType.semicolon", - "<<", "TokenType.shiftLeft", - "<<=", "TokenType.shiftLeftEqual", - ">>", "TokenType.shiftRight", - ">>=", "TokenType.shiftRightEqual", - "..", "TokenType.slice", - "*", "TokenType.star", - "?", "TokenType.ternary", - "~", "TokenType.tilde", - "--", "TokenType.uMinus", - "!<>=", "TokenType.unordered", - ">>>", "TokenType.unsignedShiftRight", - ">>>=", "TokenType.unsignedShiftRightEqual", - "++", "TokenType.uPlus", - "...", "TokenType.vararg", - "^", "TokenType.xor", - "^=", "TokenType.xorEquals", + "=", "TokenType.Assign", + "&", "TokenType.BitAnd", + "&=", "TokenType.BitAndEquals", + "|", "TokenType.BitOr", + "|=", "TokenType.BitOrEquals", + "~=", "TokenType.CatEquals", + ":", "TokenType.Colon", + ",", "TokenType.Comma", + "$", "TokenType.Dollar", + ".", "TokenType.Dot", + "==", "TokenType.Equals", + "=>", "TokenType.GoesTo", + ">", "TokenType.Greater", + ">=", "TokenType.GreaterEqual", + "#", "TokenType.Hash", + "&&", "TokenType.LogicAnd", + "{", "TokenType.LBrace", + "[", "TokenType.LBracket", + "<", "TokenType.Less", + "<=", "TokenType.LessEqual", + "<>=", "TokenType.LessEqualGreater", + "<>", "TokenType.LessOrGreater", + "||", "TokenType.LogicOr", + "(", "TokenType.LParen", + "-", "TokenType.Minus", + "-=", "TokenType.MinusEquals", + "%", "TokenType.Mod", + "%=", "TokenType.ModEquals", + "*=", "TokenType.MulEquals", + "!", "TokenType.Not", + "!=", "TokenType.NotEquals", + "!>", "TokenType.NotGreater", + "!>=", "TokenType.NotGreaterEqual", + "!<", "TokenType.NotLess", + "!<=", "TokenType.NotLessEqual", + "!<>", "TokenType.NotLessEqualGreater", + "+", "TokenType.Plus", + "+=", "TokenType.PlusEquals", + "^^", "TokenType.Pow", + "^^=", "TokenType.PowEquals", + "}", "TokenType.RBrace", + "]", "TokenType.RBracket", + ")", "TokenType.RParen", + ";", "TokenType.Semicolon", + "<<", "TokenType.ShiftLeft", + "<<=", "TokenType.ShiftLeftEqual", + ">>", "TokenType.ShiftRight", + ">>=", "TokenType.ShiftRightEqual", + "..", "TokenType.Slice", + "*", "TokenType.Star", + "?", "TokenType.Ternary", + "~", "TokenType.Tilde", + "--", "TokenType.Decrement", + "!<>=", "TokenType.Unordered", + ">>>", "TokenType.UnsignedShiftRight", + ">>>=", "TokenType.UnsignedShiftRightEqual", + "++", "TokenType.Increment", + "...", "TokenType.Vararg", + "^", "TokenType.Xor", + "^=", "TokenType.XorEquals", )); case '0': .. case '9': currentToken = lexNumber(inputString, endIndex); @@ -662,7 +706,7 @@ Token[] tokenize(S)(S inputString, IterationStyle iterationStyle = IterationStyl if (isEoF(inputString, endIndex)) { currentToken.value = "/"; - currentToken.type = TokenType.div; + currentToken.type = TokenType.Div; currentToken.lineNumber = lineNumber; break; } @@ -680,17 +724,17 @@ Token[] tokenize(S)(S inputString, IterationStyle iterationStyle = IterationStyl else { currentToken.value = lexComment(inputString, endIndex, lineNumber); - currentToken.type = TokenType.comment; + currentToken.type = TokenType.Comment; break; } case '=': currentToken.value = "/="; - currentToken.type = TokenType.divEquals; + currentToken.type = TokenType.DivEquals; ++endIndex; break; default: currentToken.value = "/"; - currentToken.type = TokenType.div; + currentToken.type = TokenType.Div; break; } break; @@ -701,13 +745,13 @@ Token[] tokenize(S)(S inputString, IterationStyle iterationStyle = IterationStyl currentToken.lineNumber = lineNumber; currentToken.value = lexString(inputString, endIndex, lineNumber, inputString[endIndex], false); - currentToken.type = TokenType.stringLiteral; + currentToken.type = TokenType.StringLiteral; break; case '`': currentToken.lineNumber = lineNumber; currentToken.value = lexString(inputString, endIndex, lineNumber, inputString[endIndex], false); - currentToken.type = TokenType.stringLiteral; + currentToken.type = TokenType.StringLiteral; break; case 'x': ++endIndex; @@ -720,7 +764,7 @@ Token[] tokenize(S)(S inputString, IterationStyle iterationStyle = IterationStyl currentToken.lineNumber = lineNumber; currentToken.value = lexString(inputString, endIndex, lineNumber, inputString[endIndex]); - currentToken.type = TokenType.stringLiteral; + currentToken.type = TokenType.StringLiteral; break; case 'q': currentToken.value = "q"; @@ -733,13 +777,13 @@ Token[] tokenize(S)(S inputString, IterationStyle iterationStyle = IterationStyl currentToken.lineNumber = lineNumber; currentToken.value ~= lexDelimitedString(inputString, endIndex, lineNumber); - currentToken.type = TokenType.stringLiteral; + currentToken.type = TokenType.StringLiteral; break outerSwitch; case '{': currentToken.lineNumber = lineNumber; currentToken.value ~= lexTokenString(inputString, endIndex, lineNumber); - currentToken.type = TokenType.stringLiteral; + currentToken.type = TokenType.StringLiteral; break outerSwitch; default: break;