mirror of
https://github.com/ldc-developers/ldc.git
synced 2025-05-07 11:26:02 +03:00

Notably, the glue layer side of the changed multiple interface inheritance layout (DMD a54e89d) has not been implemented yet. This corresponds to DMD commit 3f6a763c0589dd03c1c206eafd434b593702564e.
2516 lines
72 KiB
D
2516 lines
72 KiB
D
// Compiler implementation of the D programming language
|
|
// Copyright (c) 1999-2015 by Digital Mars
|
|
// All Rights Reserved
|
|
// written by Walter Bright
|
|
// http://www.digitalmars.com
|
|
// Distributed under the Boost Software License, Version 1.0.
|
|
// http://www.boost.org/LICENSE_1_0.txt
|
|
|
|
module ddmd.lexer;
|
|
|
|
import core.stdc.ctype;
|
|
import core.stdc.errno;
|
|
import core.stdc.stdarg;
|
|
import core.stdc.stdio;
|
|
import core.stdc.string;
|
|
import core.stdc.time;
|
|
|
|
import ddmd.entity;
|
|
import ddmd.errors;
|
|
import ddmd.globals;
|
|
import ddmd.id;
|
|
import ddmd.identifier;
|
|
import ddmd.root.longdouble;
|
|
import ddmd.root.outbuffer;
|
|
import ddmd.root.port;
|
|
import ddmd.root.rmem;
|
|
import ddmd.root.stringtable;
|
|
import ddmd.tokens;
|
|
import ddmd.utf;
|
|
|
|
enum LS = 0x2028; // UTF line separator
|
|
enum PS = 0x2029; // UTF paragraph separator
|
|
|
|
/********************************************
|
|
* Do our own char maps
|
|
*/
|
|
immutable ubyte[256] cmtable;
|
|
|
|
enum CMoctal = 0x1;
|
|
enum CMhex = 0x2;
|
|
enum CMidchar = 0x4;
|
|
enum CMzerosecond = 0x8;
|
|
enum CMdigitsecond = 0x10;
|
|
enum CMsinglechar = 0x20;
|
|
|
|
bool isoctal(char c)
|
|
{
|
|
return (cmtable[c] & CMoctal) != 0;
|
|
}
|
|
|
|
bool ishex(char c)
|
|
{
|
|
return (cmtable[c] & CMhex) != 0;
|
|
}
|
|
|
|
bool isidchar(char c)
|
|
{
|
|
return (cmtable[c] & CMidchar) != 0;
|
|
}
|
|
|
|
bool isZeroSecond(char c)
|
|
{
|
|
return (cmtable[c] & CMzerosecond) != 0;
|
|
}
|
|
|
|
bool isDigitSecond(char c)
|
|
{
|
|
return (cmtable[c] & CMdigitsecond) != 0;
|
|
}
|
|
|
|
bool issinglechar(char c)
|
|
{
|
|
return (cmtable[c] & CMsinglechar) != 0;
|
|
}
|
|
|
|
static this()
|
|
{
|
|
foreach (const c; 0 .. cmtable.length)
|
|
{
|
|
if ('0' <= c && c <= '7')
|
|
cmtable[c] |= CMoctal;
|
|
if (isxdigit(c))
|
|
cmtable[c] |= CMhex;
|
|
if (isalnum(c) || c == '_')
|
|
cmtable[c] |= CMidchar;
|
|
|
|
switch (c)
|
|
{
|
|
case 'x': case 'X':
|
|
case 'b': case 'B':
|
|
cmtable[c] |= CMzerosecond;
|
|
break;
|
|
|
|
case '0': .. case '9':
|
|
case 'e': case 'E':
|
|
case 'f': case 'F':
|
|
case 'l': case 'L':
|
|
case 'p': case 'P':
|
|
case 'u': case 'U':
|
|
case 'i':
|
|
case '.':
|
|
case '_':
|
|
cmtable[c] |= CMzerosecond | CMdigitsecond;
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
switch (c)
|
|
{
|
|
case '\\':
|
|
case '\n':
|
|
case '\r':
|
|
case 0:
|
|
case 0x1A:
|
|
case '\'':
|
|
break;
|
|
default:
|
|
if (!(c & 0x80))
|
|
cmtable[c] |= CMsinglechar;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
unittest
|
|
{
|
|
//printf("lexer.unittest\n");
|
|
/* Not much here, just trying things out.
|
|
*/
|
|
string text = "int";
|
|
scope Lexer lex1 = new Lexer(null, text.ptr, 0, text.length, 0, 0);
|
|
TOK tok;
|
|
tok = lex1.nextToken();
|
|
//printf("tok == %s, %d, %d\n", Token::toChars(tok), tok, TOKint32);
|
|
assert(tok == TOKint32);
|
|
tok = lex1.nextToken();
|
|
assert(tok == TOKeof);
|
|
tok = lex1.nextToken();
|
|
assert(tok == TOKeof);
|
|
}
|
|
|
|
/***********************************************************
|
|
*/
|
|
class Lexer
|
|
{
|
|
public:
|
|
__gshared OutBuffer stringbuffer;
|
|
|
|
Loc scanloc; // for error messages
|
|
|
|
const(char)* base; // pointer to start of buffer
|
|
const(char)* end; // past end of buffer
|
|
const(char)* p; // current character
|
|
const(char)* line; // start of current line
|
|
Token token;
|
|
bool doDocComment; // collect doc comment information
|
|
bool anyToken; // seen at least one token
|
|
bool commentToken; // comments are TOKcomment's
|
|
bool errors; // errors occurred during lexing or parsing
|
|
|
|
/*********************
|
|
* Creates a Lexer.
|
|
* Params:
|
|
* filename = used for error messages
|
|
* base = source code, ending in a 0 byte
|
|
* begoffset = starting offset into base[]
|
|
* endoffset = last offset into base[]
|
|
* doDocComment = handle documentation comments
|
|
* commentToken = comments become TOKcomment's
|
|
*/
|
|
this(const(char)* filename, const(char)* base, size_t begoffset, size_t endoffset, bool doDocComment, bool commentToken)
|
|
{
|
|
scanloc = Loc(filename, 1, 1);
|
|
//printf("Lexer::Lexer(%p,%d)\n",base,length);
|
|
//printf("lexer.filename = %s\n", filename);
|
|
token = Token.init;
|
|
this.base = base;
|
|
this.end = base + endoffset;
|
|
p = base + begoffset;
|
|
line = p;
|
|
this.doDocComment = doDocComment;
|
|
this.commentToken = commentToken;
|
|
//initKeywords();
|
|
/* If first line starts with '#!', ignore the line
|
|
*/
|
|
if (p[0] == '#' && p[1] == '!')
|
|
{
|
|
p += 2;
|
|
while (1)
|
|
{
|
|
char c = *p;
|
|
switch (c)
|
|
{
|
|
case '\n':
|
|
p++;
|
|
break;
|
|
case '\r':
|
|
p++;
|
|
if (*p == '\n')
|
|
p++;
|
|
break;
|
|
case 0:
|
|
case 0x1A:
|
|
break;
|
|
default:
|
|
if (c & 0x80)
|
|
{
|
|
uint u = decodeUTF();
|
|
if (u == PS || u == LS)
|
|
break;
|
|
}
|
|
p++;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
endOfLine();
|
|
}
|
|
}
|
|
|
|
final TOK nextToken()
|
|
{
|
|
if (token.next)
|
|
{
|
|
Token* t = token.next;
|
|
memcpy(&token, t, Token.sizeof);
|
|
t.free();
|
|
}
|
|
else
|
|
{
|
|
scan(&token);
|
|
}
|
|
//token.print();
|
|
return token.value;
|
|
}
|
|
|
|
/***********************
|
|
* Look ahead at next token's value.
|
|
*/
|
|
final TOK peekNext()
|
|
{
|
|
return peek(&token).value;
|
|
}
|
|
|
|
/***********************
|
|
* Look 2 tokens ahead at value.
|
|
*/
|
|
final TOK peekNext2()
|
|
{
|
|
Token* t = peek(&token);
|
|
return peek(t).value;
|
|
}
|
|
|
|
/****************************
|
|
* Turn next token in buffer into a token.
|
|
*/
|
|
final void scan(Token* t)
|
|
{
|
|
const lastLine = scanloc.linnum;
|
|
Loc startLoc;
|
|
t.blockComment = null;
|
|
t.lineComment = null;
|
|
while (1)
|
|
{
|
|
t.ptr = p;
|
|
//printf("p = %p, *p = '%c'\n",p,*p);
|
|
t.loc = loc();
|
|
switch (*p)
|
|
{
|
|
case 0:
|
|
case 0x1A:
|
|
t.value = TOKeof; // end of file
|
|
return;
|
|
case ' ':
|
|
case '\t':
|
|
case '\v':
|
|
case '\f':
|
|
p++;
|
|
continue;
|
|
// skip white space
|
|
case '\r':
|
|
p++;
|
|
if (*p != '\n') // if CR stands by itself
|
|
endOfLine();
|
|
continue;
|
|
// skip white space
|
|
case '\n':
|
|
p++;
|
|
endOfLine();
|
|
continue;
|
|
// skip white space
|
|
case '0':
|
|
if (!isZeroSecond(p[1])) // if numeric literal does not continue
|
|
{
|
|
++p;
|
|
t.uns64value = 0;
|
|
t.value = TOKint32v;
|
|
return;
|
|
}
|
|
goto Lnumber;
|
|
|
|
case '1': .. case '9':
|
|
if (!isDigitSecond(p[1])) // if numeric literal does not continue
|
|
{
|
|
t.uns64value = *p - '0';
|
|
++p;
|
|
t.value = TOKint32v;
|
|
return;
|
|
}
|
|
Lnumber:
|
|
t.value = number(t);
|
|
return;
|
|
|
|
case '\'':
|
|
if (issinglechar(p[1]) && p[2] == '\'')
|
|
{
|
|
t.uns64value = p[1]; // simple one character literal
|
|
t.value = TOKcharv;
|
|
p += 3;
|
|
}
|
|
else
|
|
t.value = charConstant(t);
|
|
return;
|
|
case 'r':
|
|
if (p[1] != '"')
|
|
goto case_ident;
|
|
p++;
|
|
goto case '`';
|
|
case '`':
|
|
t.value = wysiwygStringConstant(t, *p);
|
|
return;
|
|
case 'x':
|
|
if (p[1] != '"')
|
|
goto case_ident;
|
|
p++;
|
|
t.value = hexStringConstant(t);
|
|
return;
|
|
case 'q':
|
|
if (p[1] == '"')
|
|
{
|
|
p++;
|
|
t.value = delimitedStringConstant(t);
|
|
return;
|
|
}
|
|
else if (p[1] == '{')
|
|
{
|
|
p++;
|
|
t.value = tokenStringConstant(t);
|
|
return;
|
|
}
|
|
else
|
|
goto case_ident;
|
|
case '"':
|
|
t.value = escapeStringConstant(t, 0);
|
|
return;
|
|
case 'a':
|
|
case 'b':
|
|
case 'c':
|
|
case 'd':
|
|
case 'e':
|
|
case 'f':
|
|
case 'g':
|
|
case 'h':
|
|
case 'i':
|
|
case 'j':
|
|
case 'k':
|
|
case 'l':
|
|
case 'm':
|
|
case 'n':
|
|
case 'o':
|
|
case 'p':
|
|
/*case 'q': case 'r':*/
|
|
case 's':
|
|
case 't':
|
|
case 'u':
|
|
case 'v':
|
|
case 'w':
|
|
/*case 'x':*/
|
|
case 'y':
|
|
case 'z':
|
|
case 'A':
|
|
case 'B':
|
|
case 'C':
|
|
case 'D':
|
|
case 'E':
|
|
case 'F':
|
|
case 'G':
|
|
case 'H':
|
|
case 'I':
|
|
case 'J':
|
|
case 'K':
|
|
case 'L':
|
|
case 'M':
|
|
case 'N':
|
|
case 'O':
|
|
case 'P':
|
|
case 'Q':
|
|
case 'R':
|
|
case 'S':
|
|
case 'T':
|
|
case 'U':
|
|
case 'V':
|
|
case 'W':
|
|
case 'X':
|
|
case 'Y':
|
|
case 'Z':
|
|
case '_':
|
|
case_ident:
|
|
{
|
|
while (1)
|
|
{
|
|
const c = *++p;
|
|
if (isidchar(c))
|
|
continue;
|
|
else if (c & 0x80)
|
|
{
|
|
const s = p;
|
|
const u = decodeUTF();
|
|
if (isUniAlpha(u))
|
|
continue;
|
|
error("char 0x%04x not allowed in identifier", u);
|
|
p = s;
|
|
}
|
|
break;
|
|
}
|
|
Identifier id = Identifier.idPool(cast(char*)t.ptr, p - t.ptr);
|
|
t.ident = id;
|
|
t.value = cast(TOK)id.value;
|
|
anyToken = 1;
|
|
if (*t.ptr == '_') // if special identifier token
|
|
{
|
|
__gshared bool initdone = false;
|
|
__gshared char[11 + 1] date;
|
|
__gshared char[8 + 1] time;
|
|
__gshared char[24 + 1] timestamp;
|
|
if (!initdone) // lazy evaluation
|
|
{
|
|
initdone = true;
|
|
time_t ct;
|
|
.time(&ct);
|
|
const p = ctime(&ct);
|
|
assert(p);
|
|
sprintf(&date[0], "%.6s %.4s", p + 4, p + 20);
|
|
sprintf(&time[0], "%.8s", p + 11);
|
|
sprintf(×tamp[0], "%.24s", p);
|
|
}
|
|
if (id == Id.DATE)
|
|
{
|
|
t.ustring = date.ptr;
|
|
goto Lstr;
|
|
}
|
|
else if (id == Id.TIME)
|
|
{
|
|
t.ustring = time.ptr;
|
|
goto Lstr;
|
|
}
|
|
else if (id == Id.VENDOR)
|
|
{
|
|
t.ustring = global.compiler.vendor;
|
|
goto Lstr;
|
|
}
|
|
else if (id == Id.TIMESTAMP)
|
|
{
|
|
t.ustring = timestamp.ptr;
|
|
Lstr:
|
|
t.value = TOKstring;
|
|
t.postfix = 0;
|
|
t.len = cast(uint)strlen(t.ustring);
|
|
}
|
|
else if (id == Id.VERSIONX)
|
|
{
|
|
uint major = 0;
|
|
uint minor = 0;
|
|
bool point = false;
|
|
for (const(char)* p = global._version + 1; 1; p++)
|
|
{
|
|
const c = *p;
|
|
if (isdigit(cast(char)c))
|
|
minor = minor * 10 + c - '0';
|
|
else if (c == '.')
|
|
{
|
|
if (point)
|
|
break;
|
|
// ignore everything after second '.'
|
|
point = true;
|
|
major = minor;
|
|
minor = 0;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
t.value = TOKint64v;
|
|
t.uns64value = major * 1000 + minor;
|
|
}
|
|
else if (id == Id.EOFX)
|
|
{
|
|
t.value = TOKeof;
|
|
// Advance scanner to end of file
|
|
while (!(*p == 0 || *p == 0x1A))
|
|
p++;
|
|
}
|
|
}
|
|
//printf("t->value = %d\n",t->value);
|
|
return;
|
|
}
|
|
case '/':
|
|
p++;
|
|
switch (*p)
|
|
{
|
|
case '=':
|
|
p++;
|
|
t.value = TOKdivass;
|
|
return;
|
|
case '*':
|
|
p++;
|
|
startLoc = loc();
|
|
while (1)
|
|
{
|
|
while (1)
|
|
{
|
|
const c = *p;
|
|
switch (c)
|
|
{
|
|
case '/':
|
|
break;
|
|
case '\n':
|
|
endOfLine();
|
|
p++;
|
|
continue;
|
|
case '\r':
|
|
p++;
|
|
if (*p != '\n')
|
|
endOfLine();
|
|
continue;
|
|
case 0:
|
|
case 0x1A:
|
|
error("unterminated /* */ comment");
|
|
p = end;
|
|
t.loc = loc();
|
|
t.value = TOKeof;
|
|
return;
|
|
default:
|
|
if (c & 0x80)
|
|
{
|
|
const u = decodeUTF();
|
|
if (u == PS || u == LS)
|
|
endOfLine();
|
|
}
|
|
p++;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
p++;
|
|
if (p[-2] == '*' && p - 3 != t.ptr)
|
|
break;
|
|
}
|
|
if (commentToken)
|
|
{
|
|
t.loc = startLoc;
|
|
t.value = TOKcomment;
|
|
return;
|
|
}
|
|
else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr)
|
|
{
|
|
// if /** but not /**/
|
|
getDocComment(t, lastLine == startLoc.linnum);
|
|
}
|
|
continue;
|
|
case '/':
|
|
// do // style comments
|
|
startLoc = loc();
|
|
while (1)
|
|
{
|
|
const c = *++p;
|
|
switch (c)
|
|
{
|
|
case '\n':
|
|
break;
|
|
case '\r':
|
|
if (p[1] == '\n')
|
|
p++;
|
|
break;
|
|
case 0:
|
|
case 0x1A:
|
|
if (commentToken)
|
|
{
|
|
p = end;
|
|
t.loc = startLoc;
|
|
t.value = TOKcomment;
|
|
return;
|
|
}
|
|
if (doDocComment && t.ptr[2] == '/')
|
|
getDocComment(t, lastLine == startLoc.linnum);
|
|
p = end;
|
|
t.loc = loc();
|
|
t.value = TOKeof;
|
|
return;
|
|
default:
|
|
if (c & 0x80)
|
|
{
|
|
const u = decodeUTF();
|
|
if (u == PS || u == LS)
|
|
break;
|
|
}
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
if (commentToken)
|
|
{
|
|
p++;
|
|
endOfLine();
|
|
t.loc = startLoc;
|
|
t.value = TOKcomment;
|
|
return;
|
|
}
|
|
if (doDocComment && t.ptr[2] == '/')
|
|
getDocComment(t, lastLine == startLoc.linnum);
|
|
p++;
|
|
endOfLine();
|
|
continue;
|
|
case '+':
|
|
{
|
|
int nest;
|
|
startLoc = loc();
|
|
p++;
|
|
nest = 1;
|
|
while (1)
|
|
{
|
|
char c = *p;
|
|
switch (c)
|
|
{
|
|
case '/':
|
|
p++;
|
|
if (*p == '+')
|
|
{
|
|
p++;
|
|
nest++;
|
|
}
|
|
continue;
|
|
case '+':
|
|
p++;
|
|
if (*p == '/')
|
|
{
|
|
p++;
|
|
if (--nest == 0)
|
|
break;
|
|
}
|
|
continue;
|
|
case '\r':
|
|
p++;
|
|
if (*p != '\n')
|
|
endOfLine();
|
|
continue;
|
|
case '\n':
|
|
endOfLine();
|
|
p++;
|
|
continue;
|
|
case 0:
|
|
case 0x1A:
|
|
error("unterminated /+ +/ comment");
|
|
p = end;
|
|
t.loc = loc();
|
|
t.value = TOKeof;
|
|
return;
|
|
default:
|
|
if (c & 0x80)
|
|
{
|
|
uint u = decodeUTF();
|
|
if (u == PS || u == LS)
|
|
endOfLine();
|
|
}
|
|
p++;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
if (commentToken)
|
|
{
|
|
t.loc = startLoc;
|
|
t.value = TOKcomment;
|
|
return;
|
|
}
|
|
if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr)
|
|
{
|
|
// if /++ but not /++/
|
|
getDocComment(t, lastLine == startLoc.linnum);
|
|
}
|
|
continue;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
t.value = TOKdiv;
|
|
return;
|
|
case '.':
|
|
p++;
|
|
if (isdigit(*p))
|
|
{
|
|
/* Note that we don't allow ._1 and ._ as being
|
|
* valid floating point numbers.
|
|
*/
|
|
p--;
|
|
t.value = inreal(t);
|
|
}
|
|
else if (p[0] == '.')
|
|
{
|
|
if (p[1] == '.')
|
|
{
|
|
p += 2;
|
|
t.value = TOKdotdotdot;
|
|
}
|
|
else
|
|
{
|
|
p++;
|
|
t.value = TOKslice;
|
|
}
|
|
}
|
|
else
|
|
t.value = TOKdot;
|
|
return;
|
|
case '&':
|
|
p++;
|
|
if (*p == '=')
|
|
{
|
|
p++;
|
|
t.value = TOKandass;
|
|
}
|
|
else if (*p == '&')
|
|
{
|
|
p++;
|
|
t.value = TOKandand;
|
|
}
|
|
else
|
|
t.value = TOKand;
|
|
return;
|
|
case '|':
|
|
p++;
|
|
if (*p == '=')
|
|
{
|
|
p++;
|
|
t.value = TOKorass;
|
|
}
|
|
else if (*p == '|')
|
|
{
|
|
p++;
|
|
t.value = TOKoror;
|
|
}
|
|
else
|
|
t.value = TOKor;
|
|
return;
|
|
case '-':
|
|
p++;
|
|
if (*p == '=')
|
|
{
|
|
p++;
|
|
t.value = TOKminass;
|
|
}
|
|
else if (*p == '-')
|
|
{
|
|
p++;
|
|
t.value = TOKminusminus;
|
|
}
|
|
else
|
|
t.value = TOKmin;
|
|
return;
|
|
case '+':
|
|
p++;
|
|
if (*p == '=')
|
|
{
|
|
p++;
|
|
t.value = TOKaddass;
|
|
}
|
|
else if (*p == '+')
|
|
{
|
|
p++;
|
|
t.value = TOKplusplus;
|
|
}
|
|
else
|
|
t.value = TOKadd;
|
|
return;
|
|
case '<':
|
|
p++;
|
|
if (*p == '=')
|
|
{
|
|
p++;
|
|
t.value = TOKle; // <=
|
|
}
|
|
else if (*p == '<')
|
|
{
|
|
p++;
|
|
if (*p == '=')
|
|
{
|
|
p++;
|
|
t.value = TOKshlass; // <<=
|
|
}
|
|
else
|
|
t.value = TOKshl; // <<
|
|
}
|
|
else if (*p == '>')
|
|
{
|
|
p++;
|
|
if (*p == '=')
|
|
{
|
|
p++;
|
|
t.value = TOKleg; // <>=
|
|
}
|
|
else
|
|
t.value = TOKlg; // <>
|
|
}
|
|
else
|
|
t.value = TOKlt; // <
|
|
return;
|
|
case '>':
|
|
p++;
|
|
if (*p == '=')
|
|
{
|
|
p++;
|
|
t.value = TOKge; // >=
|
|
}
|
|
else if (*p == '>')
|
|
{
|
|
p++;
|
|
if (*p == '=')
|
|
{
|
|
p++;
|
|
t.value = TOKshrass; // >>=
|
|
}
|
|
else if (*p == '>')
|
|
{
|
|
p++;
|
|
if (*p == '=')
|
|
{
|
|
p++;
|
|
t.value = TOKushrass; // >>>=
|
|
}
|
|
else
|
|
t.value = TOKushr; // >>>
|
|
}
|
|
else
|
|
t.value = TOKshr; // >>
|
|
}
|
|
else
|
|
t.value = TOKgt; // >
|
|
return;
|
|
case '!':
|
|
p++;
|
|
if (*p == '=')
|
|
{
|
|
p++;
|
|
t.value = TOKnotequal; // !=
|
|
}
|
|
else if (*p == '<')
|
|
{
|
|
p++;
|
|
if (*p == '>')
|
|
{
|
|
p++;
|
|
if (*p == '=')
|
|
{
|
|
p++;
|
|
t.value = TOKunord; // !<>=
|
|
}
|
|
else
|
|
t.value = TOKue; // !<>
|
|
}
|
|
else if (*p == '=')
|
|
{
|
|
p++;
|
|
t.value = TOKug; // !<=
|
|
}
|
|
else
|
|
t.value = TOKuge; // !<
|
|
}
|
|
else if (*p == '>')
|
|
{
|
|
p++;
|
|
if (*p == '=')
|
|
{
|
|
p++;
|
|
t.value = TOKul; // !>=
|
|
}
|
|
else
|
|
t.value = TOKule; // !>
|
|
}
|
|
else
|
|
t.value = TOKnot; // !
|
|
return;
|
|
case '=':
|
|
p++;
|
|
if (*p == '=')
|
|
{
|
|
p++;
|
|
t.value = TOKequal; // ==
|
|
}
|
|
else if (*p == '>')
|
|
{
|
|
p++;
|
|
t.value = TOKgoesto; // =>
|
|
}
|
|
else
|
|
t.value = TOKassign; // =
|
|
return;
|
|
case '~':
|
|
p++;
|
|
if (*p == '=')
|
|
{
|
|
p++;
|
|
t.value = TOKcatass; // ~=
|
|
}
|
|
else
|
|
t.value = TOKtilde; // ~
|
|
return;
|
|
case '^':
|
|
p++;
|
|
if (*p == '^')
|
|
{
|
|
p++;
|
|
if (*p == '=')
|
|
{
|
|
p++;
|
|
t.value = TOKpowass; // ^^=
|
|
}
|
|
else
|
|
t.value = TOKpow; // ^^
|
|
}
|
|
else if (*p == '=')
|
|
{
|
|
p++;
|
|
t.value = TOKxorass; // ^=
|
|
}
|
|
else
|
|
t.value = TOKxor; // ^
|
|
return;
|
|
case '(':
|
|
p++;
|
|
t.value = TOKlparen;
|
|
return;
|
|
case ')':
|
|
p++;
|
|
t.value = TOKrparen;
|
|
return;
|
|
case '[':
|
|
p++;
|
|
t.value = TOKlbracket;
|
|
return;
|
|
case ']':
|
|
p++;
|
|
t.value = TOKrbracket;
|
|
return;
|
|
case '{':
|
|
p++;
|
|
t.value = TOKlcurly;
|
|
return;
|
|
case '}':
|
|
p++;
|
|
t.value = TOKrcurly;
|
|
return;
|
|
case '?':
|
|
p++;
|
|
t.value = TOKquestion;
|
|
return;
|
|
case ',':
|
|
p++;
|
|
t.value = TOKcomma;
|
|
return;
|
|
case ';':
|
|
p++;
|
|
t.value = TOKsemicolon;
|
|
return;
|
|
case ':':
|
|
p++;
|
|
t.value = TOKcolon;
|
|
return;
|
|
case '$':
|
|
p++;
|
|
t.value = TOKdollar;
|
|
return;
|
|
case '@':
|
|
p++;
|
|
t.value = TOKat;
|
|
return;
|
|
case '*':
|
|
p++;
|
|
if (*p == '=')
|
|
{
|
|
p++;
|
|
t.value = TOKmulass;
|
|
}
|
|
else
|
|
t.value = TOKmul;
|
|
return;
|
|
case '%':
|
|
p++;
|
|
if (*p == '=')
|
|
{
|
|
p++;
|
|
t.value = TOKmodass;
|
|
}
|
|
else
|
|
t.value = TOKmod;
|
|
return;
|
|
case '#':
|
|
{
|
|
p++;
|
|
Token n;
|
|
scan(&n);
|
|
if (n.value == TOKidentifier && n.ident == Id.line)
|
|
{
|
|
poundLine();
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
t.value = TOKpound;
|
|
return;
|
|
}
|
|
}
|
|
default:
|
|
{
|
|
dchar c = *p;
|
|
if (c & 0x80)
|
|
{
|
|
c = decodeUTF();
|
|
// Check for start of unicode identifier
|
|
if (isUniAlpha(c))
|
|
goto case_ident;
|
|
if (c == PS || c == LS)
|
|
{
|
|
endOfLine();
|
|
p++;
|
|
continue;
|
|
}
|
|
}
|
|
if (c < 0x80 && isprint(c))
|
|
error("character '%c' is not a valid token", c);
|
|
else
|
|
error("character 0x%02x is not a valid token", c);
|
|
p++;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
final Token* peek(Token* ct)
|
|
{
|
|
Token* t;
|
|
if (ct.next)
|
|
t = ct.next;
|
|
else
|
|
{
|
|
t = Token.alloc();
|
|
scan(t);
|
|
ct.next = t;
|
|
}
|
|
return t;
|
|
}
|
|
|
|
/*********************************
|
|
* tk is on the opening (.
|
|
* Look ahead and return token that is past the closing ).
|
|
*/
|
|
final Token* peekPastParen(Token* tk)
|
|
{
|
|
//printf("peekPastParen()\n");
|
|
int parens = 1;
|
|
int curlynest = 0;
|
|
while (1)
|
|
{
|
|
tk = peek(tk);
|
|
//tk->print();
|
|
switch (tk.value)
|
|
{
|
|
case TOKlparen:
|
|
parens++;
|
|
continue;
|
|
case TOKrparen:
|
|
--parens;
|
|
if (parens)
|
|
continue;
|
|
tk = peek(tk);
|
|
break;
|
|
case TOKlcurly:
|
|
curlynest++;
|
|
continue;
|
|
case TOKrcurly:
|
|
if (--curlynest >= 0)
|
|
continue;
|
|
break;
|
|
case TOKsemicolon:
|
|
if (curlynest)
|
|
continue;
|
|
break;
|
|
case TOKeof:
|
|
break;
|
|
default:
|
|
continue;
|
|
}
|
|
return tk;
|
|
}
|
|
}
|
|
|
|
/*******************************************
|
|
* Parse escape sequence.
|
|
*/
|
|
final uint escapeSequence()
|
|
{
|
|
uint c = *p;
|
|
int ndigits;
|
|
switch (c)
|
|
{
|
|
case '\'':
|
|
case '"':
|
|
case '?':
|
|
case '\\':
|
|
Lconsume:
|
|
p++;
|
|
break;
|
|
case 'a':
|
|
c = 7;
|
|
goto Lconsume;
|
|
case 'b':
|
|
c = 8;
|
|
goto Lconsume;
|
|
case 'f':
|
|
c = 12;
|
|
goto Lconsume;
|
|
case 'n':
|
|
c = 10;
|
|
goto Lconsume;
|
|
case 'r':
|
|
c = 13;
|
|
goto Lconsume;
|
|
case 't':
|
|
c = 9;
|
|
goto Lconsume;
|
|
case 'v':
|
|
c = 11;
|
|
goto Lconsume;
|
|
case 'u':
|
|
ndigits = 4;
|
|
goto Lhex;
|
|
case 'U':
|
|
ndigits = 8;
|
|
goto Lhex;
|
|
case 'x':
|
|
ndigits = 2;
|
|
Lhex:
|
|
p++;
|
|
c = *p;
|
|
if (ishex(cast(char)c))
|
|
{
|
|
uint v = 0;
|
|
int n = 0;
|
|
while (1)
|
|
{
|
|
if (isdigit(cast(char)c))
|
|
c -= '0';
|
|
else if (islower(c))
|
|
c -= 'a' - 10;
|
|
else
|
|
c -= 'A' - 10;
|
|
v = v * 16 + c;
|
|
c = *++p;
|
|
if (++n == ndigits)
|
|
break;
|
|
if (!ishex(cast(char)c))
|
|
{
|
|
error("escape hex sequence has %d hex digits instead of %d", n, ndigits);
|
|
break;
|
|
}
|
|
}
|
|
if (ndigits != 2 && !utf_isValidDchar(v))
|
|
{
|
|
error("invalid UTF character \\U%08x", v);
|
|
v = '?'; // recover with valid UTF character
|
|
}
|
|
c = v;
|
|
}
|
|
else
|
|
error("undefined escape hex sequence \\%c", c);
|
|
break;
|
|
case '&':
|
|
// named character entity
|
|
for (const idstart = ++p; 1; p++)
|
|
{
|
|
switch (*p)
|
|
{
|
|
case ';':
|
|
c = HtmlNamedEntity(idstart, p - idstart);
|
|
if (c == ~0)
|
|
{
|
|
error("unnamed character entity &%.*s;", cast(int)(p - idstart), idstart);
|
|
c = ' ';
|
|
}
|
|
p++;
|
|
break;
|
|
default:
|
|
if (isalpha(*p) || (p != idstart && isdigit(*p)))
|
|
continue;
|
|
error("unterminated named entity &%.*s;", cast(int)(p - idstart + 1), idstart);
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
case 0:
|
|
case 0x1A:
|
|
// end of file
|
|
c = '\\';
|
|
break;
|
|
default:
|
|
if (isoctal(cast(char)c))
|
|
{
|
|
uint v = 0;
|
|
int n = 0;
|
|
do
|
|
{
|
|
v = v * 8 + (c - '0');
|
|
c = *++p;
|
|
}
|
|
while (++n < 3 && isoctal(cast(char)c));
|
|
c = v;
|
|
if (c > 0xFF)
|
|
error("escape octal sequence \\%03o is larger than \\377", c);
|
|
}
|
|
else
|
|
error("undefined escape sequence \\%c", c);
|
|
break;
|
|
}
|
|
return c;
|
|
}
|
|
|
|
/**************************************
|
|
*/
|
|
final TOK wysiwygStringConstant(Token* t, int tc)
|
|
{
|
|
Loc start = loc();
|
|
p++;
|
|
stringbuffer.reset();
|
|
while (1)
|
|
{
|
|
dchar c = *p++;
|
|
switch (c)
|
|
{
|
|
case '\n':
|
|
endOfLine();
|
|
break;
|
|
case '\r':
|
|
if (*p == '\n')
|
|
continue;
|
|
// ignore
|
|
c = '\n'; // treat EndOfLine as \n character
|
|
endOfLine();
|
|
break;
|
|
case 0:
|
|
case 0x1A:
|
|
error("unterminated string constant starting at %s", start.toChars());
|
|
t.setString();
|
|
return TOKstring;
|
|
case '"':
|
|
case '`':
|
|
if (c == tc)
|
|
{
|
|
t.setString(stringbuffer);
|
|
stringPostfix(t);
|
|
return TOKstring;
|
|
}
|
|
break;
|
|
default:
|
|
if (c & 0x80)
|
|
{
|
|
p--;
|
|
const u = decodeUTF();
|
|
p++;
|
|
if (u == PS || u == LS)
|
|
endOfLine();
|
|
stringbuffer.writeUTF8(u);
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
stringbuffer.writeByte(c);
|
|
}
|
|
}
|
|
|
|
/**************************************
|
|
* Lex hex strings:
|
|
* x"0A ae 34FE BD"
|
|
*/
|
|
final TOK hexStringConstant(Token* t)
|
|
{
|
|
Loc start = loc();
|
|
uint n = 0;
|
|
uint v = ~0; // dead assignment, needed to suppress warning
|
|
p++;
|
|
stringbuffer.reset();
|
|
while (1)
|
|
{
|
|
dchar c = *p++;
|
|
switch (c)
|
|
{
|
|
case ' ':
|
|
case '\t':
|
|
case '\v':
|
|
case '\f':
|
|
continue; // skip white space
|
|
case '\r':
|
|
if (*p == '\n')
|
|
continue; // ignore '\r' if followed by '\n'
|
|
// Treat isolated '\r' as if it were a '\n'
|
|
goto case '\n';
|
|
case '\n':
|
|
endOfLine();
|
|
continue;
|
|
case 0:
|
|
case 0x1A:
|
|
error("unterminated string constant starting at %s", start.toChars());
|
|
t.setString();
|
|
return TOKxstring;
|
|
case '"':
|
|
if (n & 1)
|
|
{
|
|
error("odd number (%d) of hex characters in hex string", n);
|
|
stringbuffer.writeByte(v);
|
|
}
|
|
t.setString(stringbuffer);
|
|
stringPostfix(t);
|
|
return TOKxstring;
|
|
default:
|
|
if (c >= '0' && c <= '9')
|
|
c -= '0';
|
|
else if (c >= 'a' && c <= 'f')
|
|
c -= 'a' - 10;
|
|
else if (c >= 'A' && c <= 'F')
|
|
c -= 'A' - 10;
|
|
else if (c & 0x80)
|
|
{
|
|
p--;
|
|
const u = decodeUTF();
|
|
p++;
|
|
if (u == PS || u == LS)
|
|
endOfLine();
|
|
else
|
|
error("non-hex character \\u%04x in hex string", u);
|
|
}
|
|
else
|
|
error("non-hex character '%c' in hex string", c);
|
|
if (n & 1)
|
|
{
|
|
v = (v << 4) | c;
|
|
stringbuffer.writeByte(v);
|
|
}
|
|
else
|
|
v = c;
|
|
n++;
|
|
break;
|
|
}
|
|
}
|
|
assert(0); // see bug 15731
|
|
}
|
|
|
|
/**************************************
|
|
* Lex delimited strings:
|
|
* q"(foo(xxx))" // "foo(xxx)"
|
|
* q"[foo(]" // "foo("
|
|
* q"/foo]/" // "foo]"
|
|
* q"HERE
|
|
* foo
|
|
* HERE" // "foo\n"
|
|
* Input:
|
|
* p is on the "
|
|
*/
|
|
final TOK delimitedStringConstant(Token* t)
|
|
{
|
|
Loc start = loc();
|
|
dchar delimleft = 0;
|
|
dchar delimright = 0;
|
|
uint nest = 1;
|
|
uint nestcount = ~0; // dead assignment, needed to suppress warning
|
|
Identifier hereid = null;
|
|
uint blankrol = 0;
|
|
uint startline = 0;
|
|
p++;
|
|
stringbuffer.reset();
|
|
while (1)
|
|
{
|
|
dchar c = *p++;
|
|
//printf("c = '%c'\n", c);
|
|
switch (c)
|
|
{
|
|
case '\n':
|
|
Lnextline:
|
|
endOfLine();
|
|
startline = 1;
|
|
if (blankrol)
|
|
{
|
|
blankrol = 0;
|
|
continue;
|
|
}
|
|
if (hereid)
|
|
{
|
|
stringbuffer.writeUTF8(c);
|
|
continue;
|
|
}
|
|
break;
|
|
case '\r':
|
|
if (*p == '\n')
|
|
continue;
|
|
// ignore
|
|
c = '\n'; // treat EndOfLine as \n character
|
|
goto Lnextline;
|
|
case 0:
|
|
case 0x1A:
|
|
error("unterminated delimited string constant starting at %s", start.toChars());
|
|
t.setString();
|
|
return TOKstring;
|
|
default:
|
|
if (c & 0x80)
|
|
{
|
|
p--;
|
|
c = decodeUTF();
|
|
p++;
|
|
if (c == PS || c == LS)
|
|
goto Lnextline;
|
|
}
|
|
break;
|
|
}
|
|
if (delimleft == 0)
|
|
{
|
|
delimleft = c;
|
|
nest = 1;
|
|
nestcount = 1;
|
|
if (c == '(')
|
|
delimright = ')';
|
|
else if (c == '{')
|
|
delimright = '}';
|
|
else if (c == '[')
|
|
delimright = ']';
|
|
else if (c == '<')
|
|
delimright = '>';
|
|
else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c)))
|
|
{
|
|
// Start of identifier; must be a heredoc
|
|
Token tok;
|
|
p--;
|
|
scan(&tok); // read in heredoc identifier
|
|
if (tok.value != TOKidentifier)
|
|
{
|
|
error("identifier expected for heredoc, not %s", tok.toChars());
|
|
delimright = c;
|
|
}
|
|
else
|
|
{
|
|
hereid = tok.ident;
|
|
//printf("hereid = '%s'\n", hereid->toChars());
|
|
blankrol = 1;
|
|
}
|
|
nest = 0;
|
|
}
|
|
else
|
|
{
|
|
delimright = c;
|
|
nest = 0;
|
|
if (isspace(c))
|
|
error("delimiter cannot be whitespace");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (blankrol)
|
|
{
|
|
error("heredoc rest of line should be blank");
|
|
blankrol = 0;
|
|
continue;
|
|
}
|
|
if (nest == 1)
|
|
{
|
|
if (c == delimleft)
|
|
nestcount++;
|
|
else if (c == delimright)
|
|
{
|
|
nestcount--;
|
|
if (nestcount == 0)
|
|
goto Ldone;
|
|
}
|
|
}
|
|
else if (c == delimright)
|
|
goto Ldone;
|
|
if (startline && isalpha(c) && hereid)
|
|
{
|
|
Token tok;
|
|
auto psave = p;
|
|
p--;
|
|
scan(&tok); // read in possible heredoc identifier
|
|
//printf("endid = '%s'\n", tok.ident->toChars());
|
|
if (tok.value == TOKidentifier && tok.ident.equals(hereid))
|
|
{
|
|
/* should check that rest of line is blank
|
|
*/
|
|
goto Ldone;
|
|
}
|
|
p = psave;
|
|
}
|
|
stringbuffer.writeUTF8(c);
|
|
startline = 0;
|
|
}
|
|
}
|
|
Ldone:
|
|
if (*p == '"')
|
|
p++;
|
|
else if (hereid)
|
|
error("delimited string must end in %s\"", hereid.toChars());
|
|
else
|
|
error("delimited string must end in %c\"", delimright);
|
|
t.setString(stringbuffer);
|
|
stringPostfix(t);
|
|
return TOKstring;
|
|
}
|
|
|
|
/**************************************
|
|
* Lex delimited strings:
|
|
* q{ foo(xxx) } // " foo(xxx) "
|
|
* q{foo(} // "foo("
|
|
* q{{foo}"}"} // "{foo}"}""
|
|
* Input:
|
|
* p is on the q
|
|
*/
|
|
final TOK tokenStringConstant(Token* t)
|
|
{
|
|
uint nest = 1;
|
|
const start = loc();
|
|
const pstart = ++p;
|
|
while (1)
|
|
{
|
|
Token tok;
|
|
scan(&tok);
|
|
switch (tok.value)
|
|
{
|
|
case TOKlcurly:
|
|
nest++;
|
|
continue;
|
|
case TOKrcurly:
|
|
if (--nest == 0)
|
|
{
|
|
t.setString(pstart, p - 1 - pstart);
|
|
stringPostfix(t);
|
|
return TOKstring;
|
|
}
|
|
continue;
|
|
case TOKeof:
|
|
error("unterminated token string constant starting at %s", start.toChars());
|
|
t.setString();
|
|
return TOKstring;
|
|
default:
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**************************************
|
|
*/
|
|
final TOK escapeStringConstant(Token* t, int wide)
|
|
{
|
|
const start = loc();
|
|
p++;
|
|
stringbuffer.reset();
|
|
while (1)
|
|
{
|
|
dchar c = *p++;
|
|
switch (c)
|
|
{
|
|
case '\\':
|
|
switch (*p)
|
|
{
|
|
case 'u':
|
|
case 'U':
|
|
case '&':
|
|
c = escapeSequence();
|
|
stringbuffer.writeUTF8(c);
|
|
continue;
|
|
default:
|
|
c = escapeSequence();
|
|
break;
|
|
}
|
|
break;
|
|
case '\n':
|
|
endOfLine();
|
|
break;
|
|
case '\r':
|
|
if (*p == '\n')
|
|
continue;
|
|
// ignore
|
|
c = '\n'; // treat EndOfLine as \n character
|
|
endOfLine();
|
|
break;
|
|
case '"':
|
|
t.setString(stringbuffer);
|
|
stringPostfix(t);
|
|
return TOKstring;
|
|
case 0:
|
|
case 0x1A:
|
|
p--;
|
|
error("unterminated string constant starting at %s", start.toChars());
|
|
t.setString();
|
|
return TOKstring;
|
|
default:
|
|
if (c & 0x80)
|
|
{
|
|
p--;
|
|
c = decodeUTF();
|
|
if (c == LS || c == PS)
|
|
{
|
|
c = '\n';
|
|
endOfLine();
|
|
}
|
|
p++;
|
|
stringbuffer.writeUTF8(c);
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
stringbuffer.writeByte(c);
|
|
}
|
|
}
|
|
|
|
/**************************************
|
|
*/
|
|
final TOK charConstant(Token* t)
|
|
{
|
|
auto tk = TOKcharv;
|
|
//printf("Lexer::charConstant\n");
|
|
p++;
|
|
dchar c = *p++;
|
|
switch (c)
|
|
{
|
|
case '\\':
|
|
switch (*p)
|
|
{
|
|
case 'u':
|
|
t.uns64value = escapeSequence();
|
|
tk = TOKwcharv;
|
|
break;
|
|
case 'U':
|
|
case '&':
|
|
t.uns64value = escapeSequence();
|
|
tk = TOKdcharv;
|
|
break;
|
|
default:
|
|
t.uns64value = escapeSequence();
|
|
break;
|
|
}
|
|
break;
|
|
case '\n':
|
|
L1:
|
|
endOfLine();
|
|
goto case;
|
|
case '\r':
|
|
case 0:
|
|
case 0x1A:
|
|
case '\'':
|
|
error("unterminated character constant");
|
|
t.uns64value = '?';
|
|
return tk;
|
|
default:
|
|
if (c & 0x80)
|
|
{
|
|
p--;
|
|
c = decodeUTF();
|
|
p++;
|
|
if (c == LS || c == PS)
|
|
goto L1;
|
|
if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE))
|
|
tk = TOKwcharv;
|
|
else
|
|
tk = TOKdcharv;
|
|
}
|
|
t.uns64value = c;
|
|
break;
|
|
}
|
|
if (*p != '\'')
|
|
{
|
|
error("unterminated character constant");
|
|
t.uns64value = '?';
|
|
return tk;
|
|
}
|
|
p++;
|
|
return tk;
|
|
}
|
|
|
|
/***************************************
|
|
* Get postfix of string literal.
|
|
*/
|
|
final void stringPostfix(Token* t)
|
|
{
|
|
switch (*p)
|
|
{
|
|
case 'c':
|
|
case 'w':
|
|
case 'd':
|
|
t.postfix = *p;
|
|
p++;
|
|
break;
|
|
default:
|
|
t.postfix = 0;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/**************************************
|
|
* Read in a number.
|
|
* If it's an integer, store it in tok.TKutok.Vlong.
|
|
* integers can be decimal, octal or hex
|
|
* Handle the suffixes U, UL, LU, L, etc.
|
|
* If it's double, store it in tok.TKutok.Vdouble.
|
|
* Returns:
|
|
* TKnum
|
|
* TKdouble,...
|
|
*/
|
|
final TOK number(Token* t)
|
|
{
|
|
int base = 10;
|
|
const start = p;
|
|
uinteger_t n = 0; // unsigned >=64 bit integer type
|
|
int d;
|
|
bool err = false;
|
|
bool overflow = false;
|
|
dchar c = *p;
|
|
if (c == '0')
|
|
{
|
|
++p;
|
|
c = *p;
|
|
switch (c)
|
|
{
|
|
case '0':
|
|
case '1':
|
|
case '2':
|
|
case '3':
|
|
case '4':
|
|
case '5':
|
|
case '6':
|
|
case '7':
|
|
n = c - '0';
|
|
++p;
|
|
base = 8;
|
|
break;
|
|
case 'x':
|
|
case 'X':
|
|
++p;
|
|
base = 16;
|
|
break;
|
|
case 'b':
|
|
case 'B':
|
|
++p;
|
|
base = 2;
|
|
break;
|
|
case '.':
|
|
if (p[1] == '.')
|
|
goto Ldone;
|
|
// if ".."
|
|
if (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80)
|
|
goto Ldone;
|
|
// if ".identifier" or ".unicode"
|
|
goto Lreal;
|
|
// '.' is part of current token
|
|
case 'i':
|
|
case 'f':
|
|
case 'F':
|
|
goto Lreal;
|
|
case '_':
|
|
++p;
|
|
base = 8;
|
|
break;
|
|
case 'L':
|
|
if (p[1] == 'i')
|
|
goto Lreal;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
while (1)
|
|
{
|
|
c = *p;
|
|
switch (c)
|
|
{
|
|
case '0':
|
|
case '1':
|
|
++p;
|
|
d = c - '0';
|
|
break;
|
|
case '2':
|
|
case '3':
|
|
case '4':
|
|
case '5':
|
|
case '6':
|
|
case '7':
|
|
if (base == 2 && !err)
|
|
{
|
|
error("binary digit expected");
|
|
err = true;
|
|
}
|
|
++p;
|
|
d = c - '0';
|
|
break;
|
|
case '8':
|
|
case '9':
|
|
++p;
|
|
if (base < 10 && !err)
|
|
{
|
|
error("radix %d digit expected, not '%c'", base, c);
|
|
err = true;
|
|
}
|
|
d = c - '0';
|
|
break;
|
|
case 'a':
|
|
case 'b':
|
|
case 'c':
|
|
case 'd':
|
|
case 'e':
|
|
case 'f':
|
|
case 'A':
|
|
case 'B':
|
|
case 'C':
|
|
case 'D':
|
|
case 'E':
|
|
case 'F':
|
|
++p;
|
|
if (base != 16)
|
|
{
|
|
if (c == 'e' || c == 'E' || c == 'f' || c == 'F')
|
|
goto Lreal;
|
|
if (!err)
|
|
{
|
|
error("radix %d digit expected, not '%c'", base, c);
|
|
err = true;
|
|
}
|
|
}
|
|
if (c >= 'a')
|
|
d = c + 10 - 'a';
|
|
else
|
|
d = c + 10 - 'A';
|
|
break;
|
|
case 'L':
|
|
if (p[1] == 'i')
|
|
goto Lreal;
|
|
goto Ldone;
|
|
case '.':
|
|
if (p[1] == '.')
|
|
goto Ldone;
|
|
// if ".."
|
|
if (base == 10 && (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80))
|
|
goto Ldone;
|
|
// if ".identifier" or ".unicode"
|
|
goto Lreal;
|
|
// otherwise as part of a floating point literal
|
|
case 'p':
|
|
case 'P':
|
|
case 'i':
|
|
Lreal:
|
|
p = start;
|
|
return inreal(t);
|
|
case '_':
|
|
++p;
|
|
continue;
|
|
default:
|
|
goto Ldone;
|
|
}
|
|
// Avoid expensive overflow check if we aren't at risk of overflow
|
|
if (n <= 0x0FFF_FFFF_FFFF_FFFFUL)
|
|
n = n * base + d;
|
|
else
|
|
{
|
|
import core.checkedint : mulu, addu;
|
|
|
|
n = mulu(n, base, overflow);
|
|
n = addu(n, d, overflow);
|
|
}
|
|
}
|
|
Ldone:
|
|
if (overflow && !err)
|
|
{
|
|
error("integer overflow");
|
|
err = true;
|
|
}
|
|
enum FLAGS : int
|
|
{
|
|
FLAGS_none = 0,
|
|
FLAGS_decimal = 1, // decimal
|
|
FLAGS_unsigned = 2, // u or U suffix
|
|
FLAGS_long = 4, // L suffix
|
|
}
|
|
|
|
alias FLAGS_none = FLAGS.FLAGS_none;
|
|
alias FLAGS_decimal = FLAGS.FLAGS_decimal;
|
|
alias FLAGS_unsigned = FLAGS.FLAGS_unsigned;
|
|
alias FLAGS_long = FLAGS.FLAGS_long;
|
|
|
|
FLAGS flags = (base == 10) ? FLAGS_decimal : FLAGS_none;
|
|
// Parse trailing 'u', 'U', 'l' or 'L' in any combination
|
|
const psuffix = p;
|
|
while (1)
|
|
{
|
|
FLAGS f;
|
|
switch (*p)
|
|
{
|
|
case 'U':
|
|
case 'u':
|
|
f = FLAGS_unsigned;
|
|
goto L1;
|
|
case 'l':
|
|
f = FLAGS_long;
|
|
error("lower case integer suffix 'l' is not allowed. Please use 'L' instead");
|
|
goto L1;
|
|
case 'L':
|
|
f = FLAGS_long;
|
|
L1:
|
|
p++;
|
|
if ((flags & f) && !err)
|
|
{
|
|
error("unrecognized token");
|
|
err = true;
|
|
}
|
|
flags = cast(FLAGS)(flags | f);
|
|
continue;
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
if (base == 8 && n >= 8)
|
|
error("octal literals 0%llo%.*s are no longer supported, use std.conv.octal!%llo%.*s instead", n, p - psuffix, psuffix, n, p - psuffix, psuffix);
|
|
TOK result;
|
|
switch (flags)
|
|
{
|
|
case FLAGS_none:
|
|
/* Octal or Hexadecimal constant.
|
|
* First that fits: int, uint, long, ulong
|
|
*/
|
|
if (n & 0x8000000000000000L)
|
|
result = TOKuns64v;
|
|
else if (n & 0xFFFFFFFF00000000L)
|
|
result = TOKint64v;
|
|
else if (n & 0x80000000)
|
|
result = TOKuns32v;
|
|
else
|
|
result = TOKint32v;
|
|
break;
|
|
case FLAGS_decimal:
|
|
/* First that fits: int, long, long long
|
|
*/
|
|
if (n & 0x8000000000000000L)
|
|
{
|
|
if (!err)
|
|
{
|
|
error("signed integer overflow");
|
|
err = true;
|
|
}
|
|
result = TOKuns64v;
|
|
}
|
|
else if (n & 0xFFFFFFFF80000000L)
|
|
result = TOKint64v;
|
|
else
|
|
result = TOKint32v;
|
|
break;
|
|
case FLAGS_unsigned:
|
|
case FLAGS_decimal | FLAGS_unsigned:
|
|
/* First that fits: uint, ulong
|
|
*/
|
|
if (n & 0xFFFFFFFF00000000L)
|
|
result = TOKuns64v;
|
|
else
|
|
result = TOKuns32v;
|
|
break;
|
|
case FLAGS_decimal | FLAGS_long:
|
|
if (n & 0x8000000000000000L)
|
|
{
|
|
if (!err)
|
|
{
|
|
error("signed integer overflow");
|
|
err = true;
|
|
}
|
|
result = TOKuns64v;
|
|
}
|
|
else
|
|
result = TOKint64v;
|
|
break;
|
|
case FLAGS_long:
|
|
if (n & 0x8000000000000000L)
|
|
result = TOKuns64v;
|
|
else
|
|
result = TOKint64v;
|
|
break;
|
|
case FLAGS_unsigned | FLAGS_long:
|
|
case FLAGS_decimal | FLAGS_unsigned | FLAGS_long:
|
|
result = TOKuns64v;
|
|
break;
|
|
default:
|
|
debug
|
|
{
|
|
printf("%x\n", flags);
|
|
}
|
|
assert(0);
|
|
}
|
|
t.uns64value = n;
|
|
return result;
|
|
}
|
|
|
|
/**************************************
|
|
* Read in characters, converting them to real.
|
|
* Bugs:
|
|
* Exponent overflow not detected.
|
|
* Too much requested precision is not detected.
|
|
*/
|
|
final TOK inreal(Token* t)
|
|
{
|
|
//printf("Lexer::inreal()\n");
|
|
debug
|
|
{
|
|
assert(*p == '.' || isdigit(*p));
|
|
}
|
|
stringbuffer.reset();
|
|
auto pstart = p;
|
|
bool hex = false;
|
|
dchar c = *p++;
|
|
// Leading '0x'
|
|
if (c == '0')
|
|
{
|
|
c = *p++;
|
|
if (c == 'x' || c == 'X')
|
|
{
|
|
hex = true;
|
|
c = *p++;
|
|
}
|
|
}
|
|
// Digits to left of '.'
|
|
while (1)
|
|
{
|
|
if (c == '.')
|
|
{
|
|
c = *p++;
|
|
break;
|
|
}
|
|
if (isdigit(c) || (hex && isxdigit(c)) || c == '_')
|
|
{
|
|
c = *p++;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
// Digits to right of '.'
|
|
while (1)
|
|
{
|
|
if (isdigit(c) || (hex && isxdigit(c)) || c == '_')
|
|
{
|
|
c = *p++;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
if (c == 'e' || c == 'E' || (hex && (c == 'p' || c == 'P')))
|
|
{
|
|
c = *p++;
|
|
if (c == '-' || c == '+')
|
|
{
|
|
c = *p++;
|
|
}
|
|
bool anyexp = false;
|
|
while (1)
|
|
{
|
|
if (isdigit(c))
|
|
{
|
|
anyexp = true;
|
|
c = *p++;
|
|
continue;
|
|
}
|
|
if (c == '_')
|
|
{
|
|
c = *p++;
|
|
continue;
|
|
}
|
|
if (!anyexp)
|
|
error("missing exponent");
|
|
break;
|
|
}
|
|
}
|
|
else if (hex)
|
|
error("exponent required for hex float");
|
|
--p;
|
|
while (pstart < p)
|
|
{
|
|
if (*pstart != '_')
|
|
stringbuffer.writeByte(*pstart);
|
|
++pstart;
|
|
}
|
|
stringbuffer.writeByte(0);
|
|
auto sbufptr = cast(const(char)*)stringbuffer.data;
|
|
TOK result;
|
|
t.float80value = Port.strtold(sbufptr, null);
|
|
errno = 0;
|
|
switch (*p)
|
|
{
|
|
case 'F':
|
|
case 'f':
|
|
// Only interested in errno return
|
|
cast(void)Port.strtof(sbufptr, null);
|
|
result = TOKfloat32v;
|
|
p++;
|
|
break;
|
|
default:
|
|
/* Should do our own strtod(), since dmc and linux gcc
|
|
* accept 2.22507e-308, while apple gcc will only take
|
|
* 2.22508e-308. Not sure who is right.
|
|
*/
|
|
// Only interested in errno return
|
|
cast(void)Port.strtod(sbufptr, null);
|
|
result = TOKfloat64v;
|
|
break;
|
|
case 'l':
|
|
error("use 'L' suffix instead of 'l'");
|
|
goto case 'L';
|
|
case 'L':
|
|
result = TOKfloat80v;
|
|
p++;
|
|
break;
|
|
}
|
|
if (*p == 'i' || *p == 'I')
|
|
{
|
|
if (*p == 'I')
|
|
error("use 'i' suffix instead of 'I'");
|
|
p++;
|
|
switch (result)
|
|
{
|
|
case TOKfloat32v:
|
|
result = TOKimaginary32v;
|
|
break;
|
|
case TOKfloat64v:
|
|
result = TOKimaginary64v;
|
|
break;
|
|
case TOKfloat80v:
|
|
result = TOKimaginary80v;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
if (errno == ERANGE)
|
|
{
|
|
const char* suffix = (result == TOKfloat32v || result == TOKimaginary32v) ? "f" : "";
|
|
error(scanloc, "number '%s%s' is not representable", sbufptr, suffix);
|
|
}
|
|
debug
|
|
{
|
|
switch (result)
|
|
{
|
|
case TOKfloat32v:
|
|
case TOKfloat64v:
|
|
case TOKfloat80v:
|
|
case TOKimaginary32v:
|
|
case TOKimaginary64v:
|
|
case TOKimaginary80v:
|
|
break;
|
|
default:
|
|
assert(0);
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
final Loc loc()
|
|
{
|
|
scanloc.charnum = cast(uint)(1 + p - line);
|
|
return scanloc;
|
|
}
|
|
|
|
final void error(const(char)* format, ...)
|
|
{
|
|
va_list ap;
|
|
va_start(ap, format);
|
|
.verror(token.loc, format, ap);
|
|
va_end(ap);
|
|
errors = true;
|
|
}
|
|
|
|
final void error(Loc loc, const(char)* format, ...)
|
|
{
|
|
va_list ap;
|
|
va_start(ap, format);
|
|
.verror(loc, format, ap);
|
|
va_end(ap);
|
|
errors = true;
|
|
}
|
|
|
|
final void deprecation(const(char)* format, ...)
|
|
{
|
|
va_list ap;
|
|
va_start(ap, format);
|
|
.vdeprecation(token.loc, format, ap);
|
|
va_end(ap);
|
|
if (global.params.useDeprecated == 0)
|
|
errors = true;
|
|
}
|
|
|
|
/*********************************************
|
|
* parse:
|
|
* #line linnum [filespec]
|
|
* also allow __LINE__ for linnum, and __FILE__ for filespec
|
|
*/
|
|
final void poundLine()
|
|
{
|
|
auto linnum = this.scanloc.linnum;
|
|
const(char)* filespec = null;
|
|
const loc = this.loc();
|
|
Token tok;
|
|
scan(&tok);
|
|
if (tok.value == TOKint32v || tok.value == TOKint64v)
|
|
{
|
|
const lin = cast(int)(tok.uns64value - 1);
|
|
if (lin != tok.uns64value - 1)
|
|
error("line number %lld out of range", cast(ulong)tok.uns64value);
|
|
else
|
|
linnum = lin;
|
|
}
|
|
else if (tok.value == TOKline)
|
|
{
|
|
}
|
|
else
|
|
goto Lerr;
|
|
while (1)
|
|
{
|
|
switch (*p)
|
|
{
|
|
case 0:
|
|
case 0x1A:
|
|
case '\n':
|
|
Lnewline:
|
|
this.scanloc.linnum = linnum;
|
|
if (filespec)
|
|
this.scanloc.filename = filespec;
|
|
return;
|
|
case '\r':
|
|
p++;
|
|
if (*p != '\n')
|
|
{
|
|
p--;
|
|
goto Lnewline;
|
|
}
|
|
continue;
|
|
case ' ':
|
|
case '\t':
|
|
case '\v':
|
|
case '\f':
|
|
p++;
|
|
continue;
|
|
// skip white space
|
|
case '_':
|
|
if (memcmp(p, "__FILE__".ptr, 8) == 0)
|
|
{
|
|
p += 8;
|
|
filespec = mem.xstrdup(scanloc.filename);
|
|
continue;
|
|
}
|
|
goto Lerr;
|
|
case '"':
|
|
if (filespec)
|
|
goto Lerr;
|
|
stringbuffer.reset();
|
|
p++;
|
|
while (1)
|
|
{
|
|
uint c;
|
|
c = *p;
|
|
switch (c)
|
|
{
|
|
case '\n':
|
|
case '\r':
|
|
case 0:
|
|
case 0x1A:
|
|
goto Lerr;
|
|
case '"':
|
|
stringbuffer.writeByte(0);
|
|
filespec = mem.xstrdup(cast(const(char)*)stringbuffer.data);
|
|
p++;
|
|
break;
|
|
default:
|
|
if (c & 0x80)
|
|
{
|
|
uint u = decodeUTF();
|
|
if (u == PS || u == LS)
|
|
goto Lerr;
|
|
}
|
|
stringbuffer.writeByte(c);
|
|
p++;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
continue;
|
|
default:
|
|
if (*p & 0x80)
|
|
{
|
|
uint u = decodeUTF();
|
|
if (u == PS || u == LS)
|
|
goto Lnewline;
|
|
}
|
|
goto Lerr;
|
|
}
|
|
}
|
|
Lerr:
|
|
error(loc, "#line integer [\"filespec\"]\\n expected");
|
|
}
|
|
|
|
/********************************************
|
|
* Decode UTF character.
|
|
* Issue error messages for invalid sequences.
|
|
* Return decoded character, advance p to last character in UTF sequence.
|
|
*/
|
|
final uint decodeUTF()
|
|
{
|
|
const s = p;
|
|
assert(*s & 0x80);
|
|
// Check length of remaining string up to 6 UTF-8 characters
|
|
size_t len;
|
|
for (len = 1; len < 6 && s[len]; len++)
|
|
{
|
|
}
|
|
size_t idx = 0;
|
|
dchar u;
|
|
const msg = utf_decodeChar(s, len, idx, u);
|
|
p += idx - 1;
|
|
if (msg)
|
|
{
|
|
error("%s", msg);
|
|
}
|
|
return u;
|
|
}
|
|
|
|
/***************************************************
|
|
* Parse doc comment embedded between t->ptr and p.
|
|
* Remove trailing blanks and tabs from lines.
|
|
* Replace all newlines with \n.
|
|
* Remove leading comment character from each line.
|
|
* Decide if it's a lineComment or a blockComment.
|
|
* Append to previous one for this token.
|
|
*/
|
|
final void getDocComment(Token* t, uint lineComment)
|
|
{
|
|
/* ct tells us which kind of comment it is: '/', '*', or '+'
|
|
*/
|
|
const ct = t.ptr[2];
|
|
/* Start of comment text skips over / * *, / + +, or / / /
|
|
*/
|
|
const(char)* q = t.ptr + 3; // start of comment text
|
|
const(char)* qend = p;
|
|
if (ct == '*' || ct == '+')
|
|
qend -= 2;
|
|
/* Scan over initial row of ****'s or ++++'s or ////'s
|
|
*/
|
|
for (; q < qend; q++)
|
|
{
|
|
if (*q != ct)
|
|
break;
|
|
}
|
|
/* Remove leading spaces until start of the comment
|
|
*/
|
|
int linestart = 0;
|
|
if (ct == '/')
|
|
{
|
|
while (q < qend && (*q == ' ' || *q == '\t'))
|
|
++q;
|
|
}
|
|
else if (q < qend)
|
|
{
|
|
if (*q == '\r')
|
|
{
|
|
++q;
|
|
if (q < qend && *q == '\n')
|
|
++q;
|
|
linestart = 1;
|
|
}
|
|
else if (*q == '\n')
|
|
{
|
|
++q;
|
|
linestart = 1;
|
|
}
|
|
}
|
|
/* Remove trailing row of ****'s or ++++'s
|
|
*/
|
|
if (ct != '/')
|
|
{
|
|
for (; q < qend; qend--)
|
|
{
|
|
if (qend[-1] != ct)
|
|
break;
|
|
}
|
|
}
|
|
/* Comment is now [q .. qend].
|
|
* Canonicalize it into buf[].
|
|
*/
|
|
OutBuffer buf;
|
|
for (; q < qend; q++)
|
|
{
|
|
char c = *q;
|
|
switch (c)
|
|
{
|
|
case '*':
|
|
case '+':
|
|
if (linestart && c == ct)
|
|
{
|
|
linestart = 0;
|
|
/* Trim preceding whitespace up to preceding \n
|
|
*/
|
|
while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
|
|
buf.offset--;
|
|
continue;
|
|
}
|
|
break;
|
|
case ' ':
|
|
case '\t':
|
|
break;
|
|
case '\r':
|
|
if (q[1] == '\n')
|
|
continue;
|
|
// skip the \r
|
|
goto Lnewline;
|
|
default:
|
|
if (c == 226)
|
|
{
|
|
// If LS or PS
|
|
if (q[1] == 128 && (q[2] == 168 || q[2] == 169))
|
|
{
|
|
q += 2;
|
|
goto Lnewline;
|
|
}
|
|
}
|
|
linestart = 0;
|
|
break;
|
|
Lnewline:
|
|
c = '\n'; // replace all newlines with \n
|
|
goto case;
|
|
case '\n':
|
|
linestart = 1;
|
|
/* Trim trailing whitespace
|
|
*/
|
|
while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
|
|
buf.offset--;
|
|
break;
|
|
}
|
|
buf.writeByte(c);
|
|
}
|
|
/* Trim trailing whitespace (if the last line does not have newline)
|
|
*/
|
|
if (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
|
|
{
|
|
while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
|
|
buf.offset--;
|
|
}
|
|
// Always end with a newline
|
|
if (!buf.offset || buf.data[buf.offset - 1] != '\n')
|
|
buf.writeByte('\n');
|
|
buf.writeByte(0);
|
|
// It's a line comment if the start of the doc comment comes
|
|
// after other non-whitespace on the same line.
|
|
const(char)** dc = (lineComment && anyToken) ? &t.lineComment : &t.blockComment;
|
|
// Combine with previous doc comment, if any
|
|
if (*dc)
|
|
*dc = combineComments(*dc, cast(const(char)*)buf.data);
|
|
else
|
|
*dc = cast(const(char)*)buf.extractData();
|
|
}
|
|
|
|
/********************************************
|
|
* Combine two document comments into one,
|
|
* separated by a newline.
|
|
*/
|
|
final static const(char)* combineComments(const(char)* c1, const(char)* c2)
|
|
{
|
|
//printf("Lexer::combineComments('%s', '%s')\n", c1, c2);
|
|
auto c = c2;
|
|
if (c1)
|
|
{
|
|
c = c1;
|
|
if (c2)
|
|
{
|
|
size_t len1 = strlen(c1);
|
|
size_t len2 = strlen(c2);
|
|
int insertNewLine = 0;
|
|
if (len1 && c1[len1 - 1] != '\n')
|
|
{
|
|
++len1;
|
|
insertNewLine = 1;
|
|
}
|
|
auto p = cast(char*)mem.xmalloc(len1 + 1 + len2 + 1);
|
|
memcpy(p, c1, len1 - insertNewLine);
|
|
if (insertNewLine)
|
|
p[len1 - 1] = '\n';
|
|
p[len1] = '\n';
|
|
memcpy(p + len1 + 1, c2, len2);
|
|
p[len1 + 1 + len2] = 0;
|
|
c = p;
|
|
}
|
|
}
|
|
return c;
|
|
}
|
|
|
|
private:
|
|
final void endOfLine()
|
|
{
|
|
scanloc.linnum++;
|
|
line = p;
|
|
}
|
|
}
|