Bug fixes for character literals and escape sequences

This commit is contained in:
Hackerpilot 2013-02-08 06:10:18 -08:00
parent c904bad110
commit 61704db501
3 changed files with 105 additions and 41 deletions

View File

@ -1,3 +1,4 @@
#dmd *.d std/d/*.d -release -inline -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline
#dmd *.d std/d/*.d -g -m64 -w -wi -property -ofdscanner #-unittest
ldc2 -O5 *.d std/d/*.d -of=dscanner -release -vectorize -m64
#dmd *.d std/d/*.d -g -m64 -w -wi -property -ofdscanner -unittest
#ldc2 -O3 *.d std/d/*.d -of=dscanner -release -vectorize -m64
ldc2 *.d std/d/*.d -of=dscanner -unittest -m64 -g

View File

@ -47,7 +47,7 @@ html { background-color: #fdf6e3; color: #002b36; }
writeSpan("kwrd", t.value);
else if (t.type == TokenType.comment)
writeSpan("com", t.value);
else if (isStringLiteral(t.type))
else if (isStringLiteral(t.type) || t.type == TokenType.characterLiteral)
writeSpan("str", t.value);
else if (isNumberLiteral(t.type))
writeSpan("num", t.value);

View File

@ -558,6 +558,8 @@ private:
lexNumber();
return;
case '\'':
lexCharacterLiteral();
return;
case '"':
case '`':
lexString();
@ -959,6 +961,16 @@ private:
case '_':
keepNonNewlineChar();
break;
case 'u':
case 'U':
if (foundDot)
{
errorMessage("Floating-point literal cannot have %s suffix".format(
cast(char) currentElement()));
return;
}
else
lexIntSuffix();
case 'i':
case 'L':
if (foundDot)
@ -1118,7 +1130,7 @@ private:
errorMessage("Unterminated character literal");
return;
}
sw: switch (currentElement())
switch (currentElement())
{
case '\'':
return;
@ -1126,8 +1138,17 @@ private:
lexEscapeSequence();
break;
default:
keepChar();
break;
if (currentElement() & 0x80)
{
while (currentElement() & 0x80)
keepChar();
break;
}
else
{
keepChar();
break;
}
}
if (currentElement() != '\'')
{
@ -1235,22 +1256,27 @@ private:
return;
case 'u':
case 'U':
uint digits = currentElement == 'u' ? 4 : 8;
keepChar();
foreach (i; 0 .. 2)
foreach (i; 0 .. digits)
{
foreach (j; 0 .. 4)
{
if (!isHexDigit(currentElement()))
{
errorMessage("Expected hex digit");
return;
}
keepChar();
}
if (!isHexDigit(currentElement()))
break;
}
if (!isHexDigit(currentElement()))
{
errorMessage("Expected hex digit instead of %s".format(
cast(char) currentElement()));
return;
}
keepChar();
}
return;
case '&':
while (!isEoF())
{
keepChar();
if (currentElement() == ';')
break;
}
return;
default:
errorMessage("Invalid escape sequence");
return;
@ -1277,9 +1303,9 @@ private:
case '0': .. case '7':
ubyte[3] digits;
size_t i;
for(; i < 3 && !isEoF(); ++i)
while(i < 3 && !isEoF())
{
digits[i] = currentElement();
digits[i++] = currentElement();
advanceRange();
if (currentElement() < '0' || currentElement() > '7') break;
}
@ -1296,30 +1322,61 @@ private:
return;
}
digits[i] = currentElement();
advanceRange();
}
decodeAndStore(digits, 2, 16);
return;
case 'u':
case 'U':
uint digitCount = currentElement == 'u' ? 4 : 8;
advanceRange();
ubyte[8] digits;
size_t i;
foreach (j; 0 .. 2)
{
foreach (k; 0 .. 4)
{
if (!isHexDigit(currentElement()))
{
errorMessage("Expected hex digit");
return;
}
digits[i++] = currentElement();
}
if (!isHexDigit(currentElement()))
break;
}
decodeAndStore(digits, i, 16);
foreach (i; 0 .. digitCount)
{
if (!isHexDigit(currentElement()))
{
errorMessage("Expected hex digit");
return;
}
digits[i] = currentElement();
advanceRange();
}
decodeAndStore(digits, digitCount, 16);
return;
case '&':
advanceRange();
ubyte[] b;
while (!isEoF())
{
if (isAlpha(currentElement()))
{
b ~= currentElement();
advanceRange();
}
else if (currentElement() == ';')
{
advanceRange();
break;
}
else
{
errorMessage("Invalid character entity");
return;
}
}
auto entity = (cast(string) b) in characterEntities;
if (entity is null)
{
errorMessage("Invalid character entity \"&%s;\"".format(
cast(char[]) b));
return;
}
else
{
for (size_t i = 0; i < (*entity).length; i++)
bufferChar(cast(ubyte) (*entity)[i]);
}
return;
default:
errorMessage("Invalid escape sequence");
return;
@ -1329,18 +1386,24 @@ private:
void decodeAndStore(ubyte[] digits, size_t maxIndex, uint base)
{
scope(failure)
{
import std.stdio;
stderr.writeln("Failed on line ", lineNumber, " of file ",
config.fileName);
}
char[4] codeUnits;
auto source = cast(char[]) digits[0 .. maxIndex + 1];
auto source = cast(char[]) digits[0 .. maxIndex];
uint codePoint = parse!uint(source, base);
ulong unitCount = encode(codeUnits, codePoint);
foreach (i; 0 .. unitCount)
bufferChar(codeUnits[unitCount]);
bufferChar(codeUnits[i]);
}
void lexDelimitedString()
in
{
assert(currentElement() == 'q');
assert(currentElement() == '"');
}
body
{
@ -1369,7 +1432,7 @@ private:
void lexNormalDelimitedString(ubyte open, ubyte close)
in
{
assert(currentElement() == '"');
assert(buffer[0 .. 2] == `q"`);
}
body
{