Bug fixes for character literals and escape sequences
This commit is contained in:
parent
c904bad110
commit
61704db501
5
build.sh
5
build.sh
|
@ -1,3 +1,4 @@
|
|||
#dmd *.d std/d/*.d -release -inline -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline
|
||||
#dmd *.d std/d/*.d -g -m64 -w -wi -property -ofdscanner #-unittest
|
||||
ldc2 -O5 *.d std/d/*.d -of=dscanner -release -vectorize -m64
|
||||
#dmd *.d std/d/*.d -g -m64 -w -wi -property -ofdscanner -unittest
|
||||
#ldc2 -O3 *.d std/d/*.d -of=dscanner -release -vectorize -m64
|
||||
ldc2 *.d std/d/*.d -of=dscanner -unittest -m64 -g
|
||||
|
|
|
@ -47,7 +47,7 @@ html { background-color: #fdf6e3; color: #002b36; }
|
|||
writeSpan("kwrd", t.value);
|
||||
else if (t.type == TokenType.comment)
|
||||
writeSpan("com", t.value);
|
||||
else if (isStringLiteral(t.type))
|
||||
else if (isStringLiteral(t.type) || t.type == TokenType.characterLiteral)
|
||||
writeSpan("str", t.value);
|
||||
else if (isNumberLiteral(t.type))
|
||||
writeSpan("num", t.value);
|
||||
|
|
101
std/d/lexer.d
101
std/d/lexer.d
|
@ -558,6 +558,8 @@ private:
|
|||
lexNumber();
|
||||
return;
|
||||
case '\'':
|
||||
lexCharacterLiteral();
|
||||
return;
|
||||
case '"':
|
||||
case '`':
|
||||
lexString();
|
||||
|
@ -959,6 +961,16 @@ private:
|
|||
case '_':
|
||||
keepNonNewlineChar();
|
||||
break;
|
||||
case 'u':
|
||||
case 'U':
|
||||
if (foundDot)
|
||||
{
|
||||
errorMessage("Floating-point literal cannot have %s suffix".format(
|
||||
cast(char) currentElement()));
|
||||
return;
|
||||
}
|
||||
else
|
||||
lexIntSuffix();
|
||||
case 'i':
|
||||
case 'L':
|
||||
if (foundDot)
|
||||
|
@ -1118,7 +1130,7 @@ private:
|
|||
errorMessage("Unterminated character literal");
|
||||
return;
|
||||
}
|
||||
sw: switch (currentElement())
|
||||
switch (currentElement())
|
||||
{
|
||||
case '\'':
|
||||
return;
|
||||
|
@ -1126,9 +1138,18 @@ private:
|
|||
lexEscapeSequence();
|
||||
break;
|
||||
default:
|
||||
if (currentElement() & 0x80)
|
||||
{
|
||||
while (currentElement() & 0x80)
|
||||
keepChar();
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
keepChar();
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (currentElement() != '\'')
|
||||
{
|
||||
errorMessage("Expected \"'\" to end character literal");
|
||||
|
@ -1235,19 +1256,24 @@ private:
|
|||
return;
|
||||
case 'u':
|
||||
case 'U':
|
||||
uint digits = currentElement == 'u' ? 4 : 8;
|
||||
keepChar();
|
||||
foreach (i; 0 .. 2)
|
||||
{
|
||||
foreach (j; 0 .. 4)
|
||||
foreach (i; 0 .. digits)
|
||||
{
|
||||
if (!isHexDigit(currentElement()))
|
||||
{
|
||||
errorMessage("Expected hex digit");
|
||||
errorMessage("Expected hex digit instead of %s".format(
|
||||
cast(char) currentElement()));
|
||||
return;
|
||||
}
|
||||
keepChar();
|
||||
}
|
||||
if (!isHexDigit(currentElement()))
|
||||
return;
|
||||
case '&':
|
||||
while (!isEoF())
|
||||
{
|
||||
keepChar();
|
||||
if (currentElement() == ';')
|
||||
break;
|
||||
}
|
||||
return;
|
||||
|
@ -1277,9 +1303,9 @@ private:
|
|||
case '0': .. case '7':
|
||||
ubyte[3] digits;
|
||||
size_t i;
|
||||
for(; i < 3 && !isEoF(); ++i)
|
||||
while(i < 3 && !isEoF())
|
||||
{
|
||||
digits[i] = currentElement();
|
||||
digits[i++] = currentElement();
|
||||
advanceRange();
|
||||
if (currentElement() < '0' || currentElement() > '7') break;
|
||||
}
|
||||
|
@ -1296,29 +1322,60 @@ private:
|
|||
return;
|
||||
}
|
||||
digits[i] = currentElement();
|
||||
advanceRange();
|
||||
}
|
||||
decodeAndStore(digits, 2, 16);
|
||||
return;
|
||||
case 'u':
|
||||
case 'U':
|
||||
uint digitCount = currentElement == 'u' ? 4 : 8;
|
||||
advanceRange();
|
||||
ubyte[8] digits;
|
||||
size_t i;
|
||||
foreach (j; 0 .. 2)
|
||||
{
|
||||
foreach (k; 0 .. 4)
|
||||
foreach (i; 0 .. digitCount)
|
||||
{
|
||||
if (!isHexDigit(currentElement()))
|
||||
{
|
||||
errorMessage("Expected hex digit");
|
||||
return;
|
||||
}
|
||||
digits[i++] = currentElement();
|
||||
digits[i] = currentElement();
|
||||
advanceRange();
|
||||
}
|
||||
if (!isHexDigit(currentElement()))
|
||||
decodeAndStore(digits, digitCount, 16);
|
||||
return;
|
||||
case '&':
|
||||
advanceRange();
|
||||
ubyte[] b;
|
||||
while (!isEoF())
|
||||
{
|
||||
if (isAlpha(currentElement()))
|
||||
{
|
||||
b ~= currentElement();
|
||||
advanceRange();
|
||||
}
|
||||
else if (currentElement() == ';')
|
||||
{
|
||||
advanceRange();
|
||||
break;
|
||||
}
|
||||
decodeAndStore(digits, i, 16);
|
||||
else
|
||||
{
|
||||
errorMessage("Invalid character entity");
|
||||
return;
|
||||
}
|
||||
}
|
||||
auto entity = (cast(string) b) in characterEntities;
|
||||
if (entity is null)
|
||||
{
|
||||
errorMessage("Invalid character entity \"&%s;\"".format(
|
||||
cast(char[]) b));
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < (*entity).length; i++)
|
||||
bufferChar(cast(ubyte) (*entity)[i]);
|
||||
}
|
||||
return;
|
||||
default:
|
||||
errorMessage("Invalid escape sequence");
|
||||
|
@ -1329,18 +1386,24 @@ private:
|
|||
|
||||
void decodeAndStore(ubyte[] digits, size_t maxIndex, uint base)
|
||||
{
|
||||
scope(failure)
|
||||
{
|
||||
import std.stdio;
|
||||
stderr.writeln("Failed on line ", lineNumber, " of file ",
|
||||
config.fileName);
|
||||
}
|
||||
char[4] codeUnits;
|
||||
auto source = cast(char[]) digits[0 .. maxIndex + 1];
|
||||
auto source = cast(char[]) digits[0 .. maxIndex];
|
||||
uint codePoint = parse!uint(source, base);
|
||||
ulong unitCount = encode(codeUnits, codePoint);
|
||||
foreach (i; 0 .. unitCount)
|
||||
bufferChar(codeUnits[unitCount]);
|
||||
bufferChar(codeUnits[i]);
|
||||
}
|
||||
|
||||
void lexDelimitedString()
|
||||
in
|
||||
{
|
||||
assert(currentElement() == 'q');
|
||||
assert(currentElement() == '"');
|
||||
}
|
||||
body
|
||||
{
|
||||
|
@ -1369,7 +1432,7 @@ private:
|
|||
void lexNormalDelimitedString(ubyte open, ubyte close)
|
||||
in
|
||||
{
|
||||
assert(currentElement() == '"');
|
||||
assert(buffer[0 .. 2] == `q"`);
|
||||
}
|
||||
body
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue