Lex various number literals differently. Fix a few bugs

This commit is contained in:
Hackerpilot 2012-04-22 18:58:35 -07:00
parent d42cf075f5
commit e7c198cbc7
7 changed files with 273 additions and 194 deletions

View File

@ -20,8 +20,10 @@ well as any paths specified in /etc/dmd.conf.
code file. code file.
# Dot Completion # Dot Completion
This is currently under development.
# Paren Completion # Paren Completion
This is currently under development.
# JSON output # JSON output
Generates a JSON summary of the input file. Generates a JSON summary of the input file.

View File

@ -1,4 +1,3 @@
// Copyright Brian Schott (Sir Alaran) 2012. // Copyright Brian Schott (Sir Alaran) 2012.
// Distributed under the Boost Software License, Version 1.0. // Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at // (See accompanying file LICENSE_1_0.txt or copy at
@ -28,100 +27,19 @@ immutable string[] versions = ["AIX", "all", "Alpha", "ARM", "BigEndian", "BSD",
"Win64", "Windows", "X86", "X86_64" "Win64", "Windows", "X86", "X86_64"
]; ];
/+/** /**
* Returns: indicies into the token array * Returns: indicies into the token array
*/ */
Tuple!(size_t, size_t) findEndOfStatement(const Token[] tokens, size_t index, out size_t) size_t findEndOfExpression(const Token[] tokens, size_t index)
{ {
return index;
}+/
string[] callChainBackwards(const Token[] tokens, size_t index)
{
if (index == 0)
return [tokens[index].value];
string[] callChain;
string current;
loop: while(true)
{
switch(tokens[index].type)
{
case TokenType.tThis:
case TokenType.identifier:
case TokenType.TYPES_BEGIN: .. case TokenType.TYPES_END:
current = tokens[index].value ~ current;
callChain = current ~ callChain;
current = "";
if (index == 0)
break loop;
else
--index;
if (tokens[index] == TokenType.not)
callChain = callChain[1 .. $];
break;
case TokenType.rBracket:
tokens.skipBrackets(index);
current ~= "[]";
break;
case TokenType.rParen:
tokens.skipParens(index);
break;
case TokenType.not:
case TokenType.dot:
if (index == 0)
break loop;
else
--index;
break;
default:
break loop;
}
}
return callChain;
} }
size_t findBeginningOfExpression(const Token[] tokens, size_t index)
string[] callChainForwards(const Token[] tokens, size_t index)
{ {
string[] callChain; return index;
while (index < tokens.length)
{
switch(tokens[index].type)
{
case TokenType.tNew:
++index;
break;
case TokenType.tThis:
case TokenType.identifier:
case TokenType.TYPES_BEGIN: .. case TokenType.TYPES_END:
callChain ~= tokens[index++].value;
break;
case TokenType.lParen:
tokens.skipParens(index);
break;
case TokenType.lBracket:
tokens.skipBrackets(index);
callChain[$ - 1] ~= "[i]";
break;
case TokenType.not:
++index;
if (tokens.startsWith(TokenType.lParen))
tokens.skipParens(index);
else
++index;
break;
default:
break;
}
if (index >= tokens.length || tokens[index] != TokenType.dot)
break;
else
++index;
}
return callChain;
} }
struct AutoComplete struct AutoComplete
{ {
this(const (Token)[] tokens, CompletionContext context) this(const (Token)[] tokens, CompletionContext context)
@ -130,39 +48,42 @@ struct AutoComplete
this.context = context; this.context = context;
} }
string getTypeOfExpression(string[] chain, const Token[] tokens, size_t cursor) string getTypeOfExpression(const(Token)[] expression, const Token[] tokens, size_t cursor)
{ {
if (chain.length == 0) return "void";
return "void";
auto type = typeOfVariable(chain[0], cursor);
if (type == "void")
return type;
chain = chain[1 .. $];
while (chain.length >= 1)
{
auto typeMap = context.getMembersOfType(type);
if (typeMap is null)
return "void";
auto memberType = typeMap[chain[0]][0];
if (memberType is null)
return "void";
type = memberType;
chain = chain[1 .. $];
}
return type;
} }
/** /**
* This is where the magic happens * This is where the magic happens
*/ */
string typeOfVariable(string symbol, size_t cursor) string typeOfVariable(Token symbol, size_t cursor)
{ {
// int is of type int, double of type double, and so on // int is of type int, double of type double, and so on
if (symbol in typeProperties) if (symbol.value in typeProperties)
return symbol; return symbol.value;
if (context.getMembersOfType(symbol)) switch (symbol.type)
return symbol; {
case TokenType.floatLiteral:
return "float";
case TokenType.doubleLiteral:
return "double";
case TokenType.realLiteral:
return "real";
case TokenType.intLiteral:
return "int";
case TokenType.unsignedIntLiteral:
return "uint";
case TokenType.longLiteral:
return "long";
case TokenType.unsignedLongLiteral:
return "ulong";
default:
break;
}
if (context.getMembersOfType(symbol.value))
return symbol.value;
// Arbitrarily define the depth of the cursor position as zero // Arbitrarily define the depth of the cursor position as zero
// iterate backwards through the code to try to find the variable // iterate backwards through the code to try to find the variable
@ -183,14 +104,13 @@ struct AutoComplete
|| p == TokenType.tConst) || p == TokenType.tConst)
&& preceedingTokens[index + 1] == TokenType.assign) && preceedingTokens[index + 1] == TokenType.assign)
{ {
auto chain = callChainForwards(tokens, index + 2); return null;
return getTypeOfExpression(chain, tokens, cursor);
} }
if (p == TokenType.identifier else if (p == TokenType.identifier
|| (p.type > TokenType.TYPES_BEGIN || (p.type > TokenType.TYPES_BEGIN
&& p.type < TokenType.TYPES_END)) && p.type < TokenType.TYPES_END))
{ {
return preceedingTokens[index - 1].value; return p.value;
} }
} }
if (index == 0) if (index == 0)
@ -207,7 +127,7 @@ struct AutoComplete
return minCount!("a.bodyStart > b.bodyStart")(structs)[0].name; return minCount!("a.bodyStart > b.bodyStart")(structs)[0].name;
foreach (s; structs) foreach (s; structs)
{ {
auto t = s.getMemberType(symbol); auto t = s.getMemberType(symbol.value);
if (t !is null) if (t !is null)
return t; return t;
} }
@ -225,14 +145,16 @@ struct AutoComplete
string parenComplete(size_t cursor) string parenComplete(size_t cursor)
{ {
stderr.writeln("parenComplete");
auto index = assumeSorted(tokens).lowerBound(cursor).length - 2; auto index = assumeSorted(tokens).lowerBound(cursor).length - 2;
Token t = tokens[index]; Token t = tokens[index];
stderr.writeln(t);
if (t.startIndex + t.value.length + 1 != cursor) if (t.startIndex + t.value.length + 1 != cursor)
return ""; return "";
switch (tokens[index].type) switch (tokens[index].type)
{ {
case TokenType.tVersion: case TokenType.tVersion:
return to!string(array(join(map!`a ~ "?1"`(versions), " "))); return to!string(join(map!`a ~ "?1"`(versions), " ").array());
case TokenType.tIf: case TokenType.tIf:
case TokenType.tCast: case TokenType.tCast:
case TokenType.tWhile: case TokenType.tWhile:
@ -251,20 +173,7 @@ struct AutoComplete
Token t = tokens[index]; Token t = tokens[index];
if (t.startIndex + t.value.length + 1 != cursor) if (t.startIndex + t.value.length + 1 != cursor)
return ""; return "";
stderr.writeln(t); auto type = typeOfVariable(t, cursor);
string[] chain = callChainBackwards(tokens, index);
auto type = getTypeOfExpression(chain, tokens, cursor);
if (type && type in typeProperties)
{
string r;
foreach (i, prop; typeProperties[type])
if (i == typeProperties.length)
r = r ~ prop;
else
r = r ~ prop ~ " ";
return r;
}
const Tuple!(string, string)[string] typeMap = context.getMembersOfType(type); const Tuple!(string, string)[string] typeMap = context.getMembersOfType(type);
if (typeMap is null) if (typeMap is null)
@ -272,7 +181,7 @@ struct AutoComplete
auto app = appender!(string[])(); auto app = appender!(string[])();
foreach (k, t; typeMap) foreach (k, t; typeMap)
app.put(k ~ t[1]); app.put(k ~ t[1]);
return to!string(array(join(sort(app.data), " "))); return to!string(array(join(sort!"a.toLower() < b.toLower()"(app.data), " ")));
} }
const(Token)[] tokens; const(Token)[] tokens;

View File

@ -1,2 +1,2 @@
dmd *.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner dmd *.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner
#dmd *.d -g -unittest -m64 -w -wi -property -oftokenizer #dmd *.d -g -unittest -m64 -w -wi -property -ofdscanner

View File

@ -30,7 +30,7 @@ html { background-color: #111; color: #ccc; }
.string { color: Tomato; font-style: italic; } .string { color: Tomato; font-style: italic; }
.property { color: HotPink; font-weight: bold;} .property { color: HotPink; font-weight: bold;}
.operator { color: tan; font-weight: bold; } .operator { color: tan; font-weight: bold; }
.type { color: cyan; } .type { color: cyan; font-weight: bold; }
</style> </style>
<pre>]"); <pre>]");
@ -50,7 +50,7 @@ html { background-color: #111; color: #ccc; }
case TokenType.stringLiteral: case TokenType.stringLiteral:
writeSpan("string", t.value); writeSpan("string", t.value);
break; break;
case TokenType.numberLiteral: case TokenType.NUMBERS_BEGIN: .. case TokenType.NUMBERS_END:
writeSpan("number", t.value); writeSpan("number", t.value);
break; break;
case TokenType.OPERATORS_BEGIN: .. case TokenType.OPERATORS_END: case TokenType.OPERATORS_BEGIN: .. case TokenType.OPERATORS_END:

View File

@ -254,11 +254,19 @@ enum TokenType: uint
// Misc // Misc
MISC_BEGIN, MISC_BEGIN,
comment, /// /** comment */ or // comment or ///comment comment, /// /** comment */ or // comment or ///comment
NUMBERS_BEGIN,
floatLiteral, /// 123.456f or 0x123_45p-af
doubleLiteral, /// 123.456
realLiteral, /// 123.456L
intLiteral, /// 123 or 0b1101010101
unsignedIntLiteral, /// 123u
longLiteral, /// 123L
unsignedLongLiteral, /// 123uL
NUMBERS_END,
stringLiteral, /// "a string" stringLiteral, /// "a string"
numberLiteral, /// int, float, etc... identifier, /// anything else
identifier,
whitespace, /// whitespace whitespace, /// whitespace
blank, blank, /// unknown token type
MISC_END, MISC_END,
} }

View File

@ -218,7 +218,6 @@ string parseTypeDeclaration(const Token[] tokens, ref size_t index)
break buildingType; break buildingType;
} }
} }
stderr.writeln("type = ", type);
return type; return type;
} }

View File

@ -141,11 +141,15 @@ body
++endIndex; ++endIndex;
} }
++endIndex; ++endIndex;
if (endIndex < inputString.length && (inputString[endIndex] == 'w'
|| inputString[endIndex] == 'd' || inputString[endIndex] == 'c'))
{
++endIndex;
}
endIndex = min(endIndex, inputString.length); endIndex = min(endIndex, inputString.length);
return inputString[startIndex .. endIndex]; return inputString[startIndex .. endIndex];
} }
/** /**
* Lexes the various crazy D string literals such as q{}, q"WTF is this? WTF", * Lexes the various crazy D string literals such as q{}, q"WTF is this? WTF",
* and q"<>". * and q"<>".
@ -218,79 +222,230 @@ string lexTokenString(S)(ref S inputString, ref size_t endIndex, ref uint lineNu
return ""; return "";
} }
/** pure nothrow Token lexNumber(S)(ref S inputString, ref size_t endIndex)
* if (isSomeString!S)
*/
pure nothrow string lexNumber(S)(ref S inputString, ref size_t endIndex) if (isSomeString!S)
{ {
auto startIndex = endIndex; Token token;
bool foundDot = false; size_t startIndex = endIndex;
bool foundX = false; if (inputString[endIndex] == '0')
bool foundB = false; {
bool foundE = false; endIndex++;
numberLoop: while (endIndex < inputString.length) if (endIndex >= inputString.length)
{
token.type = TokenType.intLiteral;
token.value = inputString[startIndex .. endIndex];
return token;
}
switch (inputString[endIndex])
{
case '0': .. case '9':
// The current language spec doesn't cover octal literals, so this
// is decimal.
lexDecimal(inputString, startIndex, endIndex, token);
return token;
case 'b':
case 'B':
lexBinary(inputString, startIndex, ++endIndex, token);
return token;
case 'x':
case 'X':
lexHex(inputString, startIndex, ++endIndex, token);
return token;
default:
token.type = TokenType.intLiteral;
token.value = inputString[startIndex .. endIndex];
return token;
}
}
else
{
lexDecimal(inputString, startIndex, endIndex, token);
return token;
}
}
pure nothrow void lexBinary(S)(ref S inputString, size_t startIndex,
ref size_t endIndex, ref Token token) if (isSomeString!S)
{
bool lexingSuffix = false;
bool isLong = false;
bool isUnsigned = false;
token.type = TokenType.intLiteral;
binaryLoop: while (endIndex < inputString.length)
{ {
switch (inputString[endIndex]) switch (inputString[endIndex])
{ {
case '0': case '0':
if (!foundX) case '1':
{ case '_':
++endIndex; ++endIndex;
if (endIndex < inputString.length if (lexingSuffix)
&& (inputString[endIndex] == 'x' || inputString[endIndex] == 'X')) break binaryLoop;
{ break;
++endIndex; case 'u':
foundX = true; case 'U':
} ++endIndex;
} lexingSuffix = true;
if (isLong)
token.type = TokenType.unsignedLongLiteral;
else else
++endIndex; token.type = TokenType.unsignedIntLiteral;
break; break;
case 'b': case 'L':
if (foundB)
break numberLoop;
foundB = true;
++endIndex; ++endIndex;
if (isLong)
break binaryLoop;
if (isUnsigned)
token.type = TokenType.unsignedLongLiteral;
else
token.type = TokenType.longLiteral;
isLong = true;
break; break;
case '.': default:
if (foundDot || foundX || foundE) break binaryLoop;
break numberLoop; }
foundDot = true; }
token.value = inputString[startIndex .. endIndex];
}
pure nothrow void lexDecimal(S)(ref S inputString, size_t startIndex,
ref size_t endIndex, ref Token token) if (isSomeString!S)
{
bool lexingSuffix = false;
bool isLong = false;
bool isUnsigned = false;
bool isFloat = false;
bool isReal = false;
bool isDouble = false;
bool foundDot = false;
bool foundE = false;
bool foundPlusMinus = false;
token.type = TokenType.intLiteral;
decimalLoop: while (endIndex < inputString.length)
{
switch (inputString[endIndex])
{
case '0': .. case '9':
case '_':
++endIndex; ++endIndex;
if (lexingSuffix)
break decimalLoop;
break;
case 'e':
case 'E':
if (foundE)
break decimalLoop;
++endIndex;
foundE = true;
break; break;
case '+': case '+':
case '-': case '-':
if (!foundE) if (foundPlusMinus || !foundE)
break numberLoop; break decimalLoop;
foundPlusMinus = true;
++endIndex; ++endIndex;
break; break;
case '.':
if (foundDot)
break decimalLoop;
++endIndex;
foundDot = true;
token.type = TokenType.doubleLiteral;
isDouble = true;
break;
case 'u':
case 'U':
++endIndex;
lexingSuffix = true;
if (isLong)
token.type = TokenType.unsignedLongLiteral;
else
token.type = TokenType.unsignedIntLiteral;
isUnsigned = true;
break;
case 'L':
++endIndex;
lexingSuffix = true;
if (isLong || isReal)
break decimalLoop;
if (isDouble)
token.type = TokenType.realLiteral;
else if (isUnsigned)
token.type = TokenType.unsignedLongLiteral;
else
token.type = TokenType.longLiteral;
isLong = true;
break;
case 'f':
case 'F':
lexingSuffix = true;
if (isUnsigned || isLong)
break decimalLoop;
++endIndex;
token.type = TokenType.floatLiteral;
break decimalLoop;
default:
break decimalLoop;
}
}
token.value = inputString[startIndex .. endIndex];
}
nothrow void lexHex(S)(ref S inputString, ref size_t startIndex,
ref size_t endIndex, ref Token token) if (isSomeString!S)
{
bool lexingSuffix = false;
bool isLong = false;
bool isUnsigned = false;
bool isFloat = false;
bool isReal = false;
bool isDouble = false;
bool foundDot = false;
bool foundE = false;
bool foundPlusMinus = false;
token.type = TokenType.intLiteral;
hexLoop: while (endIndex < inputString.length)
{
switch (inputString[endIndex])
{
case '0': .. case '9':
case 'a': .. case 'f':
case 'A': .. case 'F':
case '_':
++endIndex;
if (lexingSuffix)
break hexLoop;
break;
case 'p': case 'p':
case 'P': case 'P':
if (!foundX) if (foundE)
break numberLoop; break hexLoop;
++endIndex;
foundE = true; foundE = true;
goto case '_'; break;
case 'e': case '+':
case 'E': case '-':
if (foundE || foundX) if (foundPlusMinus || !foundE)
break numberLoop; break hexLoop;
foundE = true; foundPlusMinus = true;
goto case '_';
case '1': .. case '9':
case '_':
++endIndex; ++endIndex;
break; break;
case 'F': case '.':
case 'f': if (foundDot)
case 'L': break hexLoop;
case 'i':
++endIndex; ++endIndex;
break numberLoop; foundDot = true;
token.type = TokenType.doubleLiteral;
isDouble = true;
break;
default: default:
break numberLoop; break hexLoop;
} }
} }
return inputString[startIndex .. endIndex];
token.value = inputString[startIndex .. endIndex];
} }
@ -337,8 +492,10 @@ Token[] tokenize(S)(S inputString, IterationStyle iterationStyle = IterationStyl
size_t endIndex = 0; size_t endIndex = 0;
uint lineNumber = 1; uint lineNumber = 1;
while (endIndex < inputString.length) while (endIndex < inputString.length)
{ {
size_t prevIndex = endIndex;
Token currentToken; Token currentToken;
auto startIndex = endIndex; auto startIndex = endIndex;
if (isWhite(inputString[endIndex])) if (isWhite(inputString[endIndex]))
@ -421,11 +578,8 @@ Token[] tokenize(S)(S inputString, IterationStyle iterationStyle = IterationStyl
"^", "TokenType.xor", "^", "TokenType.xor",
"^=", "TokenType.xorEquals", "^=", "TokenType.xorEquals",
)); ));
case '0': .. case '9': case '0': .. case '9':
currentToken.value = lexNumber(inputString, endIndex); currentToken = lexNumber(inputString, endIndex);
currentToken.type = TokenType.numberLiteral;
currentToken.lineNumber = lineNumber;
break; break;
case '/': case '/':
++endIndex; ++endIndex;
@ -528,8 +682,15 @@ Token[] tokenize(S)(S inputString, IterationStyle iterationStyle = IterationStyl
currentToken.lineNumber = lineNumber; currentToken.lineNumber = lineNumber;
break; break;
} }
// writeln(currentToken); //stderr.writeln(currentToken);
tokenAppender.put(currentToken); tokenAppender.put(currentToken);
// This should never happen.
if (endIndex <= prevIndex)
{
stderr.writeln("FAIL");
return [];
}
} }
return tokenAppender.data; return tokenAppender.data;
} }