refactor: lexer in `tokens.d`

Signed-off-by: Prajwal S N <prajwalnadig21@gmail.com>
This commit is contained in:
Prajwal S N 2023-09-22 13:17:29 +05:30
parent 34174badd0
commit 61206bc0ca
No known key found for this signature in database
GPG Key ID: 60701A603988FAC2
1 changed files with 176 additions and 172 deletions

View File

@ -5,15 +5,15 @@
module dfmt.tokens; module dfmt.tokens;
import dparse.lexer; import dmd.tokens;
/// Length of an invalid token /// Length of an invalid token
enum int INVALID_TOKEN_LENGTH = -1; enum int INVALID_TOKEN_LENGTH = -1;
uint betweenParenLength(const Token[] tokens) pure @safe @nogc uint betweenParenLength(const Token[] tokens) @safe
in in
{ {
assert(tokens[0].type == tok!"("); assert(tokens[0].value == TOK.leftParenthesis);
} }
do do
{ {
@ -22,9 +22,9 @@ do
int depth = 1; int depth = 1;
while (i < tokens.length && depth > 0) while (i < tokens.length && depth > 0)
{ {
if (tokens[i].type == tok!"(") if (tokens[i].value == TOK.leftParenthesis)
depth++; depth++;
else if (tokens[i].type == tok!")") else if (tokens[i].value == TOK.rightParenthesis)
depth--; depth--;
length += tokenLength(tokens[i]); length += tokenLength(tokens[i]);
i++; i++;
@ -32,163 +32,208 @@ do
return length; return length;
} }
int tokenLength(ref const Token t) pure @safe @nogc int tokenLength(ref const Token t) @safe
{ {
import std.algorithm : countUntil; import std.algorithm : countUntil;
if (t.isKeyword())
return cast(int) Token.toString(t.value).length;
int c; int c;
switch (t.type) switch (t.value)
{ {
case tok!"doubleLiteral": // Numeric literals
case tok!"floatLiteral": case TOK.int32Literal:
case tok!"idoubleLiteral": case TOK.uns32Literal:
case tok!"ifloatLiteral": case TOK.int64Literal:
case tok!"intLiteral": case TOK.uns64Literal:
case tok!"longLiteral": case TOK.int128Literal:
case tok!"realLiteral": case TOK.uns128Literal:
case tok!"irealLiteral": case TOK.float32Literal:
case tok!"uintLiteral": case TOK.float64Literal:
case tok!"ulongLiteral": case TOK.float80Literal:
case tok!"characterLiteral": case TOK.imaginary32Literal:
return cast(int) t.text.length; case TOK.imaginary64Literal:
case tok!"identifier": case TOK.imaginary80Literal:
case tok!"stringLiteral": // Char constants
case tok!"wstringLiteral": case TOK.charLiteral:
case tok!"dstringLiteral": case TOK.wcharLiteral:
case TOK.dcharLiteral:
// Identifiers
case TOK.identifier:
return cast(int) Token.toString(t.value).length;
// Spaced operators
case TOK.add:
case TOK.addAssign:
case TOK.and:
case TOK.andAnd:
case TOK.andAssign:
case TOK.arrow:
case TOK.assign:
case TOK.colon:
case TOK.colonColon:
case TOK.comma:
case TOK.concatenateAssign:
case TOK.div:
case TOK.divAssign:
case TOK.dot:
case TOK.dotDotDot:
case TOK.equal:
case TOK.goesTo:
case TOK.greaterOrEqual:
case TOK.greaterThan:
case TOK.identity:
case TOK.is_:
case TOK.leftShift:
case TOK.leftShiftAssign:
case TOK.lessOrEqual:
case TOK.lessThan:
case TOK.min:
case TOK.minAssign:
case TOK.minusMinus:
case TOK.mod:
case TOK.modAssign:
case TOK.mul:
case TOK.mulAssign:
case TOK.not:
case TOK.notEqual:
case TOK.notIdentity:
case TOK.or:
case TOK.orAssign:
case TOK.orOr:
case TOK.plusPlus:
case TOK.pound:
case TOK.pow:
case TOK.powAssign:
case TOK.question:
case TOK.rightShift:
case TOK.rightShiftAssign:
case TOK.semicolon:
case TOK.slice:
case TOK.tilde:
case TOK.unsignedRightShift:
case TOK.unsignedRightShiftAssign:
case TOK.xor:
case TOK.xorAssign:
return cast(int) Token.toString(t.value).length + 1;
case TOK.string_:
// TODO: Unicode line breaks and old-Mac line endings // TODO: Unicode line breaks and old-Mac line endings
c = cast(int) t.text.countUntil('\n'); c = cast(int) Token.toString(t.value).countUntil('\n');
if (c == -1) if (c == -1)
return cast(int) t.text.length; return cast(int) Token.toString(t.value).length;
else else
return c; return c;
mixin(generateFixedLengthCases());
default: default:
return INVALID_TOKEN_LENGTH; return INVALID_TOKEN_LENGTH;
} }
} }
bool isBreakToken(IdType t) pure nothrow @safe @nogc bool isBreakToken(TOK t) pure nothrow @safe @nogc
{ {
switch (t) switch (t)
{ {
case tok!"||": case TOK.orOr:
case tok!"&&": case TOK.andAnd:
case tok!"(": case TOK.leftParenthesis:
case tok!"[": case TOK.leftBracket:
case tok!",": case TOK.comma:
case tok!":": case TOK.colon:
case tok!";": case TOK.semicolon:
case tok!"^^": case TOK.pow:
case tok!"^=": case TOK.powAssign:
case tok!"^": case TOK.xor:
case tok!"~=": case TOK.concatenateAssign:
case tok!"<<=": case TOK.leftShiftAssign:
case tok!"<<": case TOK.leftShift:
case tok!"<=": case TOK.lessOrEqual:
case tok!"<>=": case TOK.lessThan:
case tok!"<>": case TOK.equal:
case tok!"<": case TOK.goesTo:
case tok!"==": case TOK.assign:
case tok!"=>": case TOK.greaterOrEqual:
case tok!"=": case TOK.rightShiftAssign:
case tok!">=": case TOK.unsignedRightShift:
case tok!">>=": case TOK.unsignedRightShiftAssign:
case tok!">>>=": case TOK.rightShift:
case tok!">>>": case TOK.greaterThan:
case tok!">>": case TOK.orAssign:
case tok!">": case TOK.or:
case tok!"|=": case TOK.minAssign:
case tok!"|": case TOK.notEqual:
case tok!"-=": case TOK.question:
case tok!"!<=": case TOK.divAssign:
case tok!"!<>=": case TOK.div:
case tok!"!<>": case TOK.slice:
case tok!"!<": case TOK.mulAssign:
case tok!"!=": case TOK.mul:
case tok!"!>=": case TOK.andAssign:
case tok!"!>": case TOK.modAssign:
case tok!"?": case TOK.mod:
case tok!"/=": case TOK.addAssign:
case tok!"/": case TOK.dot:
case tok!"..": case TOK.tilde:
case tok!"*=": case TOK.add:
case tok!"*": case TOK.min:
case tok!"&=":
case tok!"%=":
case tok!"%":
case tok!"+=":
case tok!".":
case tok!"~":
case tok!"+":
case tok!"-":
return true; return true;
default: default:
return false; return false;
} }
} }
int breakCost(IdType p, IdType c) pure nothrow @safe @nogc int breakCost(TOK p, TOK c) pure nothrow @safe @nogc
{ {
switch (c) switch (c)
{ {
case tok!"||": case TOK.orOr:
case tok!"&&": case TOK.andAnd:
case tok!",": case TOK.comma:
case tok!"?": case TOK.question:
return 0; return 0;
case tok!"(": case TOK.leftParenthesis:
return 60; return 60;
case tok!"[": case TOK.leftBracket:
return 300; return 300;
case tok!";": case TOK.semicolon:
case tok!"^^": case TOK.pow:
case tok!"^=": case TOK.xorAssign:
case tok!"^": case TOK.xor:
case tok!"~=": case TOK.concatenateAssign:
case tok!"<<=": case TOK.leftShiftAssign:
case tok!"<<": case TOK.leftShift:
case tok!"<=": case TOK.lessOrEqual:
case tok!"<>=": case TOK.lessThan:
case tok!"<>": case TOK.equal:
case tok!"<": case TOK.goesTo:
case tok!"==": case TOK.assign:
case tok!"=>": case TOK.greaterOrEqual:
case tok!"=": case TOK.rightShiftAssign:
case tok!">=": case TOK.unsignedRightShiftAssign:
case tok!">>=": case TOK.unsignedRightShift:
case tok!">>>=": case TOK.rightShift:
case tok!">>>": case TOK.greaterThan:
case tok!">>": case TOK.orAssign:
case tok!">": case TOK.or:
case tok!"|=": case TOK.minAssign:
case tok!"|": case TOK.divAssign:
case tok!"-=": case TOK.div:
case tok!"!<=": case TOK.slice:
case tok!"!<>=": case TOK.mulAssign:
case tok!"!<>": case TOK.mul:
case tok!"!<": case TOK.andAssign:
case tok!"!=": case TOK.modAssign:
case tok!"!>=": case TOK.mod:
case tok!"!>": case TOK.add:
case tok!"/=": case TOK.min:
case tok!"/": case TOK.tilde:
case tok!"..": case TOK.addAssign:
case tok!"*=":
case tok!"*":
case tok!"&=":
case tok!"%=":
case tok!"%":
case tok!"+":
case tok!"-":
case tok!"~":
case tok!"+=":
return 200; return 200;
case tok!":": case TOK.colon:
// colon could be after a label or an import, where it should normally wrap like before // colon could be after a label or an import, where it should normally wrap like before
// for everything else (associative arrays) try not breaking around colons // for everything else (associative arrays) try not breaking around colons
return p == tok!"identifier" ? 0 : 300; return p == TOK.identifier ? 0 : 300;
case tok!".": case TOK.dot:
return p == tok!")" ? 0 : 300; return p == TOK.rightParenthesis ? 0 : 300;
default: default:
return 1000; return 1000;
} }
@ -198,46 +243,5 @@ pure nothrow @safe @nogc unittest
{ {
foreach (ubyte u; 0 .. ubyte.max) foreach (ubyte u; 0 .. ubyte.max)
if (isBreakToken(u)) if (isBreakToken(u))
assert(breakCost(tok!".", u) != 1000); assert(breakCost(TOK.dot, u) != 1000);
}
private string generateFixedLengthCases()
{
import std.algorithm : map;
import std.string : format;
import std.array : join;
assert(__ctfe);
string[] spacedOperatorTokens = [
",", "..", "...", "/", "/=", "!", "!<", "!<=", "!<>", "!<>=", "!=",
"!>", "!>=", "%", "%=", "&", "&&", "&=", "*", "*=", "+", "+=", "-",
"-=", ":", ";", "<", "<<", "<<=", "<=", "<>", "<>=", "=", "==", "=>",
">", ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "^", "^=", "^^",
"^^=", "|", "|=", "||", "~", "~="
];
immutable spacedOperatorTokenCases = spacedOperatorTokens.map!(
a => format(`case tok!"%s": return %d + 1;`, a, a.length)).join("\n\t");
string[] identifierTokens = [
"abstract", "alias", "align", "asm", "assert", "auto", "bool",
"break", "byte", "case", "cast", "catch", "cdouble", "cent", "cfloat", "char", "class",
"const", "continue", "creal", "dchar", "debug", "default", "delegate", "delete", "deprecated",
"do", "double", "else", "enum", "export", "extern", "false", "final", "finally", "float",
"for", "foreach", "foreach_reverse", "function", "goto", "idouble", "if", "ifloat", "immutable",
"import", "in", "inout", "int", "interface", "invariant", "ireal", "is",
"lazy", "long", "macro", "mixin", "module", "new", "nothrow", "null", "out", "override",
"package", "pragma", "private", "protected", "public", "pure", "real", "ref", "return", "scope",
"shared", "short", "static", "struct", "super", "switch", "synchronized", "template", "this",
"throw", "true", "try", "typedef", "typeid", "typeof", "ubyte", "ucent", "uint", "ulong",
"union", "unittest", "ushort", "version", "void", "wchar",
"while", "with", "__DATE__", "__EOF__", "__FILE__",
"__FUNCTION__", "__gshared", "__LINE__", "__MODULE__", "__parameters",
"__PRETTY_FUNCTION__", "__TIME__", "__TIMESTAMP__",
"__traits", "__vector", "__VENDOR__", "__VERSION__", "$", "++", "--",
".", "[", "]", "(", ")", "{", "}"
];
immutable identifierTokenCases = identifierTokens.map!(
a => format(`case tok!"%s": return %d;`, a, a.length)).join("\n\t");
return spacedOperatorTokenCases ~ identifierTokenCases;
} }