Bug fixes for character literals and escape sequences

This commit is contained in:
Hackerpilot 2013-02-08 06:10:18 -08:00
parent c904bad110
commit 61704db501
3 changed files with 105 additions and 41 deletions

View File

@ -1,3 +1,4 @@
#dmd *.d std/d/*.d -release -inline -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline #dmd *.d std/d/*.d -release -inline -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline
#dmd *.d std/d/*.d -g -m64 -w -wi -property -ofdscanner #-unittest #dmd *.d std/d/*.d -g -m64 -w -wi -property -ofdscanner -unittest
ldc2 -O5 *.d std/d/*.d -of=dscanner -release -vectorize -m64 #ldc2 -O3 *.d std/d/*.d -of=dscanner -release -vectorize -m64
ldc2 *.d std/d/*.d -of=dscanner -unittest -m64 -g

View File

@ -47,7 +47,7 @@ html { background-color: #fdf6e3; color: #002b36; }
writeSpan("kwrd", t.value); writeSpan("kwrd", t.value);
else if (t.type == TokenType.comment) else if (t.type == TokenType.comment)
writeSpan("com", t.value); writeSpan("com", t.value);
else if (isStringLiteral(t.type)) else if (isStringLiteral(t.type) || t.type == TokenType.characterLiteral)
writeSpan("str", t.value); writeSpan("str", t.value);
else if (isNumberLiteral(t.type)) else if (isNumberLiteral(t.type))
writeSpan("num", t.value); writeSpan("num", t.value);

View File

@ -558,6 +558,8 @@ private:
lexNumber(); lexNumber();
return; return;
case '\'': case '\'':
lexCharacterLiteral();
return;
case '"': case '"':
case '`': case '`':
lexString(); lexString();
@ -959,6 +961,16 @@ private:
case '_': case '_':
keepNonNewlineChar(); keepNonNewlineChar();
break; break;
case 'u':
case 'U':
if (foundDot)
{
errorMessage("Floating-point literal cannot have %s suffix".format(
cast(char) currentElement()));
return;
}
else
lexIntSuffix();
case 'i': case 'i':
case 'L': case 'L':
if (foundDot) if (foundDot)
@ -1118,7 +1130,7 @@ private:
errorMessage("Unterminated character literal"); errorMessage("Unterminated character literal");
return; return;
} }
sw: switch (currentElement()) switch (currentElement())
{ {
case '\'': case '\'':
return; return;
@ -1126,8 +1138,17 @@ private:
lexEscapeSequence(); lexEscapeSequence();
break; break;
default: default:
keepChar(); if (currentElement() & 0x80)
break; {
while (currentElement() & 0x80)
keepChar();
break;
}
else
{
keepChar();
break;
}
} }
if (currentElement() != '\'') if (currentElement() != '\'')
{ {
@ -1235,22 +1256,27 @@ private:
return; return;
case 'u': case 'u':
case 'U': case 'U':
uint digits = currentElement == 'u' ? 4 : 8;
keepChar(); keepChar();
foreach (i; 0 .. 2) foreach (i; 0 .. digits)
{ {
foreach (j; 0 .. 4) if (!isHexDigit(currentElement()))
{ {
if (!isHexDigit(currentElement())) errorMessage("Expected hex digit instead of %s".format(
{ cast(char) currentElement()));
errorMessage("Expected hex digit"); return;
return; }
} keepChar();
keepChar(); }
}
if (!isHexDigit(currentElement()))
break;
}
return; return;
case '&':
while (!isEoF())
{
keepChar();
if (currentElement() == ';')
break;
}
return;
default: default:
errorMessage("Invalid escape sequence"); errorMessage("Invalid escape sequence");
return; return;
@ -1277,9 +1303,9 @@ private:
case '0': .. case '7': case '0': .. case '7':
ubyte[3] digits; ubyte[3] digits;
size_t i; size_t i;
for(; i < 3 && !isEoF(); ++i) while(i < 3 && !isEoF())
{ {
digits[i] = currentElement(); digits[i++] = currentElement();
advanceRange(); advanceRange();
if (currentElement() < '0' || currentElement() > '7') break; if (currentElement() < '0' || currentElement() > '7') break;
} }
@ -1296,30 +1322,61 @@ private:
return; return;
} }
digits[i] = currentElement(); digits[i] = currentElement();
advanceRange();
} }
decodeAndStore(digits, 2, 16); decodeAndStore(digits, 2, 16);
return; return;
case 'u': case 'u':
case 'U': case 'U':
uint digitCount = currentElement == 'u' ? 4 : 8;
advanceRange(); advanceRange();
ubyte[8] digits; ubyte[8] digits;
size_t i; foreach (i; 0 .. digitCount)
foreach (j; 0 .. 2) {
{ if (!isHexDigit(currentElement()))
foreach (k; 0 .. 4) {
{ errorMessage("Expected hex digit");
if (!isHexDigit(currentElement())) return;
{ }
errorMessage("Expected hex digit"); digits[i] = currentElement();
return; advanceRange();
} }
digits[i++] = currentElement(); decodeAndStore(digits, digitCount, 16);
}
if (!isHexDigit(currentElement()))
break;
}
decodeAndStore(digits, i, 16);
return; return;
case '&':
advanceRange();
ubyte[] b;
while (!isEoF())
{
if (isAlpha(currentElement()))
{
b ~= currentElement();
advanceRange();
}
else if (currentElement() == ';')
{
advanceRange();
break;
}
else
{
errorMessage("Invalid character entity");
return;
}
}
auto entity = (cast(string) b) in characterEntities;
if (entity is null)
{
errorMessage("Invalid character entity \"&%s;\"".format(
cast(char[]) b));
return;
}
else
{
for (size_t i = 0; i < (*entity).length; i++)
bufferChar(cast(ubyte) (*entity)[i]);
}
return;
default: default:
errorMessage("Invalid escape sequence"); errorMessage("Invalid escape sequence");
return; return;
@ -1329,18 +1386,24 @@ private:
void decodeAndStore(ubyte[] digits, size_t maxIndex, uint base) void decodeAndStore(ubyte[] digits, size_t maxIndex, uint base)
{ {
scope(failure)
{
import std.stdio;
stderr.writeln("Failed on line ", lineNumber, " of file ",
config.fileName);
}
char[4] codeUnits; char[4] codeUnits;
auto source = cast(char[]) digits[0 .. maxIndex + 1]; auto source = cast(char[]) digits[0 .. maxIndex];
uint codePoint = parse!uint(source, base); uint codePoint = parse!uint(source, base);
ulong unitCount = encode(codeUnits, codePoint); ulong unitCount = encode(codeUnits, codePoint);
foreach (i; 0 .. unitCount) foreach (i; 0 .. unitCount)
bufferChar(codeUnits[unitCount]); bufferChar(codeUnits[i]);
} }
void lexDelimitedString() void lexDelimitedString()
in in
{ {
assert(currentElement() == 'q'); assert(currentElement() == '"');
} }
body body
{ {
@ -1369,7 +1432,7 @@ private:
void lexNormalDelimitedString(ubyte open, ubyte close) void lexNormalDelimitedString(ubyte open, ubyte close)
in in
{ {
assert(currentElement() == '"'); assert(buffer[0 .. 2] == `q"`);
} }
body body
{ {