From 61206bc0ca3879d3e4b7d349c61c71d387e5179e Mon Sep 17 00:00:00 2001 From: Prajwal S N Date: Fri, 22 Sep 2023 13:17:29 +0530 Subject: [PATCH] refactor: lexer in `tokens.d` Signed-off-by: Prajwal S N --- src/dfmt/tokens.d | 348 +++++++++++++++++++++++----------------------- 1 file changed, 176 insertions(+), 172 deletions(-) diff --git a/src/dfmt/tokens.d b/src/dfmt/tokens.d index 0271fde..9d5c9b1 100644 --- a/src/dfmt/tokens.d +++ b/src/dfmt/tokens.d @@ -5,15 +5,15 @@ module dfmt.tokens; -import dparse.lexer; +import dmd.tokens; /// Length of an invalid token enum int INVALID_TOKEN_LENGTH = -1; -uint betweenParenLength(const Token[] tokens) pure @safe @nogc +uint betweenParenLength(const Token[] tokens) @safe in { - assert(tokens[0].type == tok!"("); + assert(tokens[0].value == TOK.leftParenthesis); } do { @@ -22,9 +22,9 @@ do int depth = 1; while (i < tokens.length && depth > 0) { - if (tokens[i].type == tok!"(") + if (tokens[i].value == TOK.leftParenthesis) depth++; - else if (tokens[i].type == tok!")") + else if (tokens[i].value == TOK.rightParenthesis) depth--; length += tokenLength(tokens[i]); i++; @@ -32,163 +32,208 @@ do return length; } -int tokenLength(ref const Token t) pure @safe @nogc +int tokenLength(ref const Token t) @safe { import std.algorithm : countUntil; + if (t.isKeyword()) + return cast(int) Token.toString(t.value).length; + int c; - switch (t.type) + switch (t.value) { - case tok!"doubleLiteral": - case tok!"floatLiteral": - case tok!"idoubleLiteral": - case tok!"ifloatLiteral": - case tok!"intLiteral": - case tok!"longLiteral": - case tok!"realLiteral": - case tok!"irealLiteral": - case tok!"uintLiteral": - case tok!"ulongLiteral": - case tok!"characterLiteral": - return cast(int) t.text.length; - case tok!"identifier": - case tok!"stringLiteral": - case tok!"wstringLiteral": - case tok!"dstringLiteral": + // Numeric literals + case TOK.int32Literal: + case TOK.uns32Literal: + case TOK.int64Literal: + case TOK.uns64Literal: + case TOK.int128Literal: + case TOK.uns128Literal: + case TOK.float32Literal: + case TOK.float64Literal: + case TOK.float80Literal: + case TOK.imaginary32Literal: + case TOK.imaginary64Literal: + case TOK.imaginary80Literal: + // Char constants + case TOK.charLiteral: + case TOK.wcharLiteral: + case TOK.dcharLiteral: + // Identifiers + case TOK.identifier: + return cast(int) Token.toString(t.value).length; + // Spaced operators + case TOK.add: + case TOK.addAssign: + case TOK.and: + case TOK.andAnd: + case TOK.andAssign: + case TOK.arrow: + case TOK.assign: + case TOK.colon: + case TOK.colonColon: + case TOK.comma: + case TOK.concatenateAssign: + case TOK.div: + case TOK.divAssign: + case TOK.dot: + case TOK.dotDotDot: + case TOK.equal: + case TOK.goesTo: + case TOK.greaterOrEqual: + case TOK.greaterThan: + case TOK.identity: + case TOK.is_: + case TOK.leftShift: + case TOK.leftShiftAssign: + case TOK.lessOrEqual: + case TOK.lessThan: + case TOK.min: + case TOK.minAssign: + case TOK.minusMinus: + case TOK.mod: + case TOK.modAssign: + case TOK.mul: + case TOK.mulAssign: + case TOK.not: + case TOK.notEqual: + case TOK.notIdentity: + case TOK.or: + case TOK.orAssign: + case TOK.orOr: + case TOK.plusPlus: + case TOK.pound: + case TOK.pow: + case TOK.powAssign: + case TOK.question: + case TOK.rightShift: + case TOK.rightShiftAssign: + case TOK.semicolon: + case TOK.slice: + case TOK.tilde: + case TOK.unsignedRightShift: + case TOK.unsignedRightShiftAssign: + case TOK.xor: + case TOK.xorAssign: + return cast(int) Token.toString(t.value).length + 1; + case TOK.string_: // TODO: Unicode line breaks and old-Mac line endings - c = cast(int) t.text.countUntil('\n'); + c = cast(int) Token.toString(t.value).countUntil('\n'); if (c == -1) - return cast(int) t.text.length; + return cast(int) Token.toString(t.value).length; else return c; - mixin(generateFixedLengthCases()); + default: return INVALID_TOKEN_LENGTH; } } -bool isBreakToken(IdType t) pure nothrow @safe @nogc +bool isBreakToken(TOK t) pure nothrow @safe @nogc { switch (t) { - case tok!"||": - case tok!"&&": - case tok!"(": - case tok!"[": - case tok!",": - case tok!":": - case tok!";": - case tok!"^^": - case tok!"^=": - case tok!"^": - case tok!"~=": - case tok!"<<=": - case tok!"<<": - case tok!"<=": - case tok!"<>=": - case tok!"<>": - case tok!"<": - case tok!"==": - case tok!"=>": - case tok!"=": - case tok!">=": - case tok!">>=": - case tok!">>>=": - case tok!">>>": - case tok!">>": - case tok!">": - case tok!"|=": - case tok!"|": - case tok!"-=": - case tok!"!<=": - case tok!"!<>=": - case tok!"!<>": - case tok!"!<": - case tok!"!=": - case tok!"!>=": - case tok!"!>": - case tok!"?": - case tok!"/=": - case tok!"/": - case tok!"..": - case tok!"*=": - case tok!"*": - case tok!"&=": - case tok!"%=": - case tok!"%": - case tok!"+=": - case tok!".": - case tok!"~": - case tok!"+": - case tok!"-": + case TOK.orOr: + case TOK.andAnd: + case TOK.leftParenthesis: + case TOK.leftBracket: + case TOK.comma: + case TOK.colon: + case TOK.semicolon: + case TOK.pow: + case TOK.powAssign: + case TOK.xor: + case TOK.concatenateAssign: + case TOK.leftShiftAssign: + case TOK.leftShift: + case TOK.lessOrEqual: + case TOK.lessThan: + case TOK.equal: + case TOK.goesTo: + case TOK.assign: + case TOK.greaterOrEqual: + case TOK.rightShiftAssign: + case TOK.unsignedRightShift: + case TOK.unsignedRightShiftAssign: + case TOK.rightShift: + case TOK.greaterThan: + case TOK.orAssign: + case TOK.or: + case TOK.minAssign: + case TOK.notEqual: + case TOK.question: + case TOK.divAssign: + case TOK.div: + case TOK.slice: + case TOK.mulAssign: + case TOK.mul: + case TOK.andAssign: + case TOK.modAssign: + case TOK.mod: + case TOK.addAssign: + case TOK.dot: + case TOK.tilde: + case TOK.add: + case TOK.min: return true; default: return false; } } -int breakCost(IdType p, IdType c) pure nothrow @safe @nogc +int breakCost(TOK p, TOK c) pure nothrow @safe @nogc { switch (c) { - case tok!"||": - case tok!"&&": - case tok!",": - case tok!"?": + case TOK.orOr: + case TOK.andAnd: + case TOK.comma: + case TOK.question: return 0; - case tok!"(": + case TOK.leftParenthesis: return 60; - case tok!"[": + case TOK.leftBracket: return 300; - case tok!";": - case tok!"^^": - case tok!"^=": - case tok!"^": - case tok!"~=": - case tok!"<<=": - case tok!"<<": - case tok!"<=": - case tok!"<>=": - case tok!"<>": - case tok!"<": - case tok!"==": - case tok!"=>": - case tok!"=": - case tok!">=": - case tok!">>=": - case tok!">>>=": - case tok!">>>": - case tok!">>": - case tok!">": - case tok!"|=": - case tok!"|": - case tok!"-=": - case tok!"!<=": - case tok!"!<>=": - case tok!"!<>": - case tok!"!<": - case tok!"!=": - case tok!"!>=": - case tok!"!>": - case tok!"/=": - case tok!"/": - case tok!"..": - case tok!"*=": - case tok!"*": - case tok!"&=": - case tok!"%=": - case tok!"%": - case tok!"+": - case tok!"-": - case tok!"~": - case tok!"+=": + case TOK.semicolon: + case TOK.pow: + case TOK.xorAssign: + case TOK.xor: + case TOK.concatenateAssign: + case TOK.leftShiftAssign: + case TOK.leftShift: + case TOK.lessOrEqual: + case TOK.lessThan: + case TOK.equal: + case TOK.goesTo: + case TOK.assign: + case TOK.greaterOrEqual: + case TOK.rightShiftAssign: + case TOK.unsignedRightShiftAssign: + case TOK.unsignedRightShift: + case TOK.rightShift: + case TOK.greaterThan: + case TOK.orAssign: + case TOK.or: + case TOK.minAssign: + case TOK.divAssign: + case TOK.div: + case TOK.slice: + case TOK.mulAssign: + case TOK.mul: + case TOK.andAssign: + case TOK.modAssign: + case TOK.mod: + case TOK.add: + case TOK.min: + case TOK.tilde: + case TOK.addAssign: return 200; - case tok!":": + case TOK.colon: // colon could be after a label or an import, where it should normally wrap like before // for everything else (associative arrays) try not breaking around colons - return p == tok!"identifier" ? 0 : 300; - case tok!".": - return p == tok!")" ? 0 : 300; + return p == TOK.identifier ? 0 : 300; + case TOK.dot: + return p == TOK.rightParenthesis ? 0 : 300; default: return 1000; } @@ -198,46 +243,5 @@ pure nothrow @safe @nogc unittest { foreach (ubyte u; 0 .. ubyte.max) if (isBreakToken(u)) - assert(breakCost(tok!".", u) != 1000); -} - -private string generateFixedLengthCases() -{ - import std.algorithm : map; - import std.string : format; - import std.array : join; - - assert(__ctfe); - - string[] spacedOperatorTokens = [ - ",", "..", "...", "/", "/=", "!", "!<", "!<=", "!<>", "!<>=", "!=", - "!>", "!>=", "%", "%=", "&", "&&", "&=", "*", "*=", "+", "+=", "-", - "-=", ":", ";", "<", "<<", "<<=", "<=", "<>", "<>=", "=", "==", "=>", - ">", ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "^", "^=", "^^", - "^^=", "|", "|=", "||", "~", "~=" - ]; - immutable spacedOperatorTokenCases = spacedOperatorTokens.map!( - a => format(`case tok!"%s": return %d + 1;`, a, a.length)).join("\n\t"); - - string[] identifierTokens = [ - "abstract", "alias", "align", "asm", "assert", "auto", "bool", - "break", "byte", "case", "cast", "catch", "cdouble", "cent", "cfloat", "char", "class", - "const", "continue", "creal", "dchar", "debug", "default", "delegate", "delete", "deprecated", - "do", "double", "else", "enum", "export", "extern", "false", "final", "finally", "float", - "for", "foreach", "foreach_reverse", "function", "goto", "idouble", "if", "ifloat", "immutable", - "import", "in", "inout", "int", "interface", "invariant", "ireal", "is", - "lazy", "long", "macro", "mixin", "module", "new", "nothrow", "null", "out", "override", - "package", "pragma", "private", "protected", "public", "pure", "real", "ref", "return", "scope", - "shared", "short", "static", "struct", "super", "switch", "synchronized", "template", "this", - "throw", "true", "try", "typedef", "typeid", "typeof", "ubyte", "ucent", "uint", "ulong", - "union", "unittest", "ushort", "version", "void", "wchar", - "while", "with", "__DATE__", "__EOF__", "__FILE__", - "__FUNCTION__", "__gshared", "__LINE__", "__MODULE__", "__parameters", - "__PRETTY_FUNCTION__", "__TIME__", "__TIMESTAMP__", - "__traits", "__vector", "__VENDOR__", "__VERSION__", "$", "++", "--", - ".", "[", "]", "(", ")", "{", "}" - ]; - immutable identifierTokenCases = identifierTokens.map!( - a => format(`case tok!"%s": return %d;`, a, a.length)).join("\n\t"); - return spacedOperatorTokenCases ~ identifierTokenCases; + assert(breakCost(TOK.dot, u) != 1000); }