refactor: lexer in `tokens.d`

Signed-off-by: Prajwal S N <prajwalnadig21@gmail.com>
This commit is contained in:
Prajwal S N 2023-09-22 13:17:29 +05:30
parent 34174badd0
commit 61206bc0ca
No known key found for this signature in database
GPG Key ID: 60701A603988FAC2
1 changed files with 176 additions and 172 deletions

View File

@ -5,15 +5,15 @@
module dfmt.tokens;
import dparse.lexer;
import dmd.tokens;
/// Length of an invalid token
enum int INVALID_TOKEN_LENGTH = -1;
uint betweenParenLength(const Token[] tokens) pure @safe @nogc
uint betweenParenLength(const Token[] tokens) @safe
in
{
assert(tokens[0].type == tok!"(");
assert(tokens[0].value == TOK.leftParenthesis);
}
do
{
@ -22,9 +22,9 @@ do
int depth = 1;
while (i < tokens.length && depth > 0)
{
if (tokens[i].type == tok!"(")
if (tokens[i].value == TOK.leftParenthesis)
depth++;
else if (tokens[i].type == tok!")")
else if (tokens[i].value == TOK.rightParenthesis)
depth--;
length += tokenLength(tokens[i]);
i++;
@ -32,163 +32,208 @@ do
return length;
}
int tokenLength(ref const Token t) pure @safe @nogc
int tokenLength(ref const Token t) @safe
{
import std.algorithm : countUntil;
if (t.isKeyword())
return cast(int) Token.toString(t.value).length;
int c;
switch (t.type)
switch (t.value)
{
case tok!"doubleLiteral":
case tok!"floatLiteral":
case tok!"idoubleLiteral":
case tok!"ifloatLiteral":
case tok!"intLiteral":
case tok!"longLiteral":
case tok!"realLiteral":
case tok!"irealLiteral":
case tok!"uintLiteral":
case tok!"ulongLiteral":
case tok!"characterLiteral":
return cast(int) t.text.length;
case tok!"identifier":
case tok!"stringLiteral":
case tok!"wstringLiteral":
case tok!"dstringLiteral":
// Numeric literals
case TOK.int32Literal:
case TOK.uns32Literal:
case TOK.int64Literal:
case TOK.uns64Literal:
case TOK.int128Literal:
case TOK.uns128Literal:
case TOK.float32Literal:
case TOK.float64Literal:
case TOK.float80Literal:
case TOK.imaginary32Literal:
case TOK.imaginary64Literal:
case TOK.imaginary80Literal:
// Char constants
case TOK.charLiteral:
case TOK.wcharLiteral:
case TOK.dcharLiteral:
// Identifiers
case TOK.identifier:
return cast(int) Token.toString(t.value).length;
// Spaced operators
case TOK.add:
case TOK.addAssign:
case TOK.and:
case TOK.andAnd:
case TOK.andAssign:
case TOK.arrow:
case TOK.assign:
case TOK.colon:
case TOK.colonColon:
case TOK.comma:
case TOK.concatenateAssign:
case TOK.div:
case TOK.divAssign:
case TOK.dot:
case TOK.dotDotDot:
case TOK.equal:
case TOK.goesTo:
case TOK.greaterOrEqual:
case TOK.greaterThan:
case TOK.identity:
case TOK.is_:
case TOK.leftShift:
case TOK.leftShiftAssign:
case TOK.lessOrEqual:
case TOK.lessThan:
case TOK.min:
case TOK.minAssign:
case TOK.minusMinus:
case TOK.mod:
case TOK.modAssign:
case TOK.mul:
case TOK.mulAssign:
case TOK.not:
case TOK.notEqual:
case TOK.notIdentity:
case TOK.or:
case TOK.orAssign:
case TOK.orOr:
case TOK.plusPlus:
case TOK.pound:
case TOK.pow:
case TOK.powAssign:
case TOK.question:
case TOK.rightShift:
case TOK.rightShiftAssign:
case TOK.semicolon:
case TOK.slice:
case TOK.tilde:
case TOK.unsignedRightShift:
case TOK.unsignedRightShiftAssign:
case TOK.xor:
case TOK.xorAssign:
return cast(int) Token.toString(t.value).length + 1;
case TOK.string_:
// TODO: Unicode line breaks and old-Mac line endings
c = cast(int) t.text.countUntil('\n');
c = cast(int) Token.toString(t.value).countUntil('\n');
if (c == -1)
return cast(int) t.text.length;
return cast(int) Token.toString(t.value).length;
else
return c;
mixin(generateFixedLengthCases());
default:
return INVALID_TOKEN_LENGTH;
}
}
bool isBreakToken(IdType t) pure nothrow @safe @nogc
bool isBreakToken(TOK t) pure nothrow @safe @nogc
{
switch (t)
{
case tok!"||":
case tok!"&&":
case tok!"(":
case tok!"[":
case tok!",":
case tok!":":
case tok!";":
case tok!"^^":
case tok!"^=":
case tok!"^":
case tok!"~=":
case tok!"<<=":
case tok!"<<":
case tok!"<=":
case tok!"<>=":
case tok!"<>":
case tok!"<":
case tok!"==":
case tok!"=>":
case tok!"=":
case tok!">=":
case tok!">>=":
case tok!">>>=":
case tok!">>>":
case tok!">>":
case tok!">":
case tok!"|=":
case tok!"|":
case tok!"-=":
case tok!"!<=":
case tok!"!<>=":
case tok!"!<>":
case tok!"!<":
case tok!"!=":
case tok!"!>=":
case tok!"!>":
case tok!"?":
case tok!"/=":
case tok!"/":
case tok!"..":
case tok!"*=":
case tok!"*":
case tok!"&=":
case tok!"%=":
case tok!"%":
case tok!"+=":
case tok!".":
case tok!"~":
case tok!"+":
case tok!"-":
case TOK.orOr:
case TOK.andAnd:
case TOK.leftParenthesis:
case TOK.leftBracket:
case TOK.comma:
case TOK.colon:
case TOK.semicolon:
case TOK.pow:
case TOK.powAssign:
case TOK.xor:
case TOK.concatenateAssign:
case TOK.leftShiftAssign:
case TOK.leftShift:
case TOK.lessOrEqual:
case TOK.lessThan:
case TOK.equal:
case TOK.goesTo:
case TOK.assign:
case TOK.greaterOrEqual:
case TOK.rightShiftAssign:
case TOK.unsignedRightShift:
case TOK.unsignedRightShiftAssign:
case TOK.rightShift:
case TOK.greaterThan:
case TOK.orAssign:
case TOK.or:
case TOK.minAssign:
case TOK.notEqual:
case TOK.question:
case TOK.divAssign:
case TOK.div:
case TOK.slice:
case TOK.mulAssign:
case TOK.mul:
case TOK.andAssign:
case TOK.modAssign:
case TOK.mod:
case TOK.addAssign:
case TOK.dot:
case TOK.tilde:
case TOK.add:
case TOK.min:
return true;
default:
return false;
}
}
int breakCost(IdType p, IdType c) pure nothrow @safe @nogc
int breakCost(TOK p, TOK c) pure nothrow @safe @nogc
{
switch (c)
{
case tok!"||":
case tok!"&&":
case tok!",":
case tok!"?":
case TOK.orOr:
case TOK.andAnd:
case TOK.comma:
case TOK.question:
return 0;
case tok!"(":
case TOK.leftParenthesis:
return 60;
case tok!"[":
case TOK.leftBracket:
return 300;
case tok!";":
case tok!"^^":
case tok!"^=":
case tok!"^":
case tok!"~=":
case tok!"<<=":
case tok!"<<":
case tok!"<=":
case tok!"<>=":
case tok!"<>":
case tok!"<":
case tok!"==":
case tok!"=>":
case tok!"=":
case tok!">=":
case tok!">>=":
case tok!">>>=":
case tok!">>>":
case tok!">>":
case tok!">":
case tok!"|=":
case tok!"|":
case tok!"-=":
case tok!"!<=":
case tok!"!<>=":
case tok!"!<>":
case tok!"!<":
case tok!"!=":
case tok!"!>=":
case tok!"!>":
case tok!"/=":
case tok!"/":
case tok!"..":
case tok!"*=":
case tok!"*":
case tok!"&=":
case tok!"%=":
case tok!"%":
case tok!"+":
case tok!"-":
case tok!"~":
case tok!"+=":
case TOK.semicolon:
case TOK.pow:
case TOK.xorAssign:
case TOK.xor:
case TOK.concatenateAssign:
case TOK.leftShiftAssign:
case TOK.leftShift:
case TOK.lessOrEqual:
case TOK.lessThan:
case TOK.equal:
case TOK.goesTo:
case TOK.assign:
case TOK.greaterOrEqual:
case TOK.rightShiftAssign:
case TOK.unsignedRightShiftAssign:
case TOK.unsignedRightShift:
case TOK.rightShift:
case TOK.greaterThan:
case TOK.orAssign:
case TOK.or:
case TOK.minAssign:
case TOK.divAssign:
case TOK.div:
case TOK.slice:
case TOK.mulAssign:
case TOK.mul:
case TOK.andAssign:
case TOK.modAssign:
case TOK.mod:
case TOK.add:
case TOK.min:
case TOK.tilde:
case TOK.addAssign:
return 200;
case tok!":":
case TOK.colon:
// colon could be after a label or an import, where it should normally wrap like before
// for everything else (associative arrays) try not breaking around colons
return p == tok!"identifier" ? 0 : 300;
case tok!".":
return p == tok!")" ? 0 : 300;
return p == TOK.identifier ? 0 : 300;
case TOK.dot:
return p == TOK.rightParenthesis ? 0 : 300;
default:
return 1000;
}
@ -198,46 +243,5 @@ pure nothrow @safe @nogc unittest
{
foreach (ubyte u; 0 .. ubyte.max)
if (isBreakToken(u))
assert(breakCost(tok!".", u) != 1000);
}
private string generateFixedLengthCases()
{
import std.algorithm : map;
import std.string : format;
import std.array : join;
assert(__ctfe);
string[] spacedOperatorTokens = [
",", "..", "...", "/", "/=", "!", "!<", "!<=", "!<>", "!<>=", "!=",
"!>", "!>=", "%", "%=", "&", "&&", "&=", "*", "*=", "+", "+=", "-",
"-=", ":", ";", "<", "<<", "<<=", "<=", "<>", "<>=", "=", "==", "=>",
">", ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "^", "^=", "^^",
"^^=", "|", "|=", "||", "~", "~="
];
immutable spacedOperatorTokenCases = spacedOperatorTokens.map!(
a => format(`case tok!"%s": return %d + 1;`, a, a.length)).join("\n\t");
string[] identifierTokens = [
"abstract", "alias", "align", "asm", "assert", "auto", "bool",
"break", "byte", "case", "cast", "catch", "cdouble", "cent", "cfloat", "char", "class",
"const", "continue", "creal", "dchar", "debug", "default", "delegate", "delete", "deprecated",
"do", "double", "else", "enum", "export", "extern", "false", "final", "finally", "float",
"for", "foreach", "foreach_reverse", "function", "goto", "idouble", "if", "ifloat", "immutable",
"import", "in", "inout", "int", "interface", "invariant", "ireal", "is",
"lazy", "long", "macro", "mixin", "module", "new", "nothrow", "null", "out", "override",
"package", "pragma", "private", "protected", "public", "pure", "real", "ref", "return", "scope",
"shared", "short", "static", "struct", "super", "switch", "synchronized", "template", "this",
"throw", "true", "try", "typedef", "typeid", "typeof", "ubyte", "ucent", "uint", "ulong",
"union", "unittest", "ushort", "version", "void", "wchar",
"while", "with", "__DATE__", "__EOF__", "__FILE__",
"__FUNCTION__", "__gshared", "__LINE__", "__MODULE__", "__parameters",
"__PRETTY_FUNCTION__", "__TIME__", "__TIMESTAMP__",
"__traits", "__vector", "__VENDOR__", "__VERSION__", "$", "++", "--",
".", "[", "]", "(", ")", "{", "}"
];
immutable identifierTokenCases = identifierTokens.map!(
a => format(`case tok!"%s": return %d;`, a, a.length)).join("\n\t");
return spacedOperatorTokenCases ~ identifierTokenCases;
assert(breakCost(TOK.dot, u) != 1000);
}