fix tokenizer

This commit is contained in:
Vadim Lopatin 2016-09-12 16:20:53 +03:00
parent a0594d5922
commit 420812aa3c
5 changed files with 107 additions and 31 deletions

View File

@ -478,7 +478,7 @@
<doXGeneration>1</doXGeneration> <doXGeneration>1</doXGeneration>
<xfilename>$(IntDir)\$(TargetName).json</xfilename> <xfilename>$(IntDir)\$(TargetName).json</xfilename>
<debuglevel>0</debuglevel> <debuglevel>0</debuglevel>
<debugids>DebugInfo DCD</debugids> <debugids>TestParser DebugInfo DCD</debugids>
<versionlevel>0</versionlevel> <versionlevel>0</versionlevel>
<versionids>USE_CONSOLE EmbedStandardResources</versionids> <versionids>USE_CONSOLE EmbedStandardResources</versionids>
<dump_source>0</dump_source> <dump_source>0</dump_source>
@ -691,9 +691,11 @@
</Folder> </Folder>
<Folder name="ddc"> <Folder name="ddc">
<Folder name="lexer"> <Folder name="lexer">
<File path="src\ddc\lexer\ast.d" />
<File path="src\ddc\lexer\exceptions.d" /> <File path="src\ddc\lexer\exceptions.d" />
<File path="src\ddc\lexer\Lexer.d" /> <File path="src\ddc\lexer\Lexer.d" />
<File path="src\ddc\lexer\LexerException.d" /> <File path="src\ddc\lexer\LexerException.d" />
<File path="src\ddc\lexer\parser.d" />
<File path="src\ddc\lexer\textsource.d" /> <File path="src\ddc\lexer\textsource.d" />
<File path="src\ddc\lexer\tokenizer.d" /> <File path="src\ddc\lexer\tokenizer.d" />
</Folder> </Folder>

4
src/ddc/lexer/ast.d Normal file
View File

@ -0,0 +1,4 @@
module ddc.lexer.ast;
class ASTNode {
}

46
src/ddc/lexer/parser.d Normal file
View File

@ -0,0 +1,46 @@
module ddc.lexer.parser;
import ddc.lexer.tokenizer;
import ddc.lexer.ast;
import dlangui.core.textsource;
import dlangui.core.logger;
ASTNode parseSource(dstring text, SourceFile file) {
ASTNode res;
import std.array;
ArraySourceLines lines = new ArraySourceLines();
dstring[] src = text.split('\n');
lines.initialize(src, file, 0);
Tokenizer tokenizer = new Tokenizer(lines);
//tokenizer.errorTolerant = true;
try {
Token[] tokens = tokenizer.allTokens();
ulong len = tokens.length;
Log.v("tokens: ", tokens);
} catch (Exception e) {
// error
Log.e("Tokenizer exception");
}
return res;
}
ASTNode parseSource(dstring text, string filename) {
return parseSource(text, new SourceFile(filename));
}
debug(TestParser):
void testParser(dstring source) {
Log.setLogLevel(LogLevel.Trace);
Log.d("Trying to parse\n", source);
ASTNode res = parseSource(source, "main.d");
}
void runParserTests() {
testParser(q{
// testing parser
import std.stdio;
int main(string[]) {
}
});
}

View File

@ -896,10 +896,13 @@ public Keyword findKeyword(Keyword start, Keyword end, dchar * name, int len, re
* Token. * Token.
*/ */
class Token { class Token {
protected SourceFile _file; // 32bit 64bit platform
protected int _line; // vtable 4 bytes 8 bytes
protected int _pos; protected SourceFile _file; // 4 bytes 8 bytes
protected TokenType _type; protected int _line; // 4 bytes 4 bytes
protected int _pos; // 4 bytes 4 bytes
protected TokenType _type; // 1 byte 1 byte
// total 17 bytes 25 bytes
/// returns token type /// returns token type
@property TokenType type() { return _type; } @property TokenType type() { return _type; }
/// returns file info for source /// returns file info for source
@ -909,7 +912,7 @@ class Token {
/// returns 1-based source line position of token start /// returns 1-based source line position of token start
@property int pos() { return _pos; } @property int pos() { return _pos; }
/// returns token text /// returns token text
@property dchar[] text() { return null; } @property dstring text() { return null; }
// number token properties // number token properties
@property dchar literalType() { return 0; } @property dchar literalType() { return 0; }
@ -1018,7 +1021,7 @@ class OpToken : Token {
OpCode _op; OpCode _op;
public @property override OpCode opCode() { return _op; } public @property override OpCode opCode() { return _op; }
public @property void opCode(OpCode op) { _op = op; } public @property void opCode(OpCode op) { _op = op; }
public @property override dchar[] text() { return cast(dchar[])getOpNameD(_op); } public @property override dstring text() { return getOpNameD(_op); }
this() { this() {
super(TokenType.OP); super(TokenType.OP);
} }
@ -1039,7 +1042,7 @@ class KeywordToken : Token {
Keyword _keyword; Keyword _keyword;
public @property override Keyword keyword() { return _keyword; } public @property override Keyword keyword() { return _keyword; }
public @property void keyword(Keyword keyword) { _keyword = keyword; } public @property void keyword(Keyword keyword) { _keyword = keyword; }
public @property override dchar[] text() { return cast(dchar[])getKeywordNameD(_keyword); } public @property override dstring text() { return getKeywordNameD(_keyword); }
this() { this() {
super(TokenType.KEYWORD); super(TokenType.KEYWORD);
} }
@ -1058,7 +1061,7 @@ class KeywordToken : Token {
/// comment token /// comment token
class CommentToken : Token { class CommentToken : Token {
protected dchar[] _text; protected dstring _text;
protected bool _isDocumentationComment; protected bool _isDocumentationComment;
protected bool _isMultilineComment; protected bool _isMultilineComment;
@ -1080,14 +1083,14 @@ class CommentToken : Token {
_isMultilineComment = f; _isMultilineComment = f;
} }
@property override dchar[] text() { return _text; } @property override dstring text() { return _text; }
@property void text(dchar[] text) { _text = text; } @property void text(dchar[] text) { _text = cast(dstring)text; }
this() { this() {
super(TokenType.COMMENT); super(TokenType.COMMENT);
} }
this(SourceFile file, uint line, uint pos, dchar[] text) { this(SourceFile file, uint line, uint pos, dchar[] text) {
super(TokenType.COMMENT, file, line, pos); super(TokenType.COMMENT, file, line, pos);
_text = text; _text = cast(dstring)text;
} }
override public Token clone() { override public Token clone() {
CommentToken res = new CommentToken(_file, _line, _pos, _text.dup); CommentToken res = new CommentToken(_file, _line, _pos, _text.dup);
@ -1102,7 +1105,7 @@ class CommentToken : Token {
/// Invalid token holder - for error tolerant parsing /// Invalid token holder - for error tolerant parsing
class InvalidToken : Token { class InvalidToken : Token {
protected dchar[] _text; protected dstring _text;
protected TokenType _invalidTokenType; protected TokenType _invalidTokenType;
protected int _errorCode; protected int _errorCode;
protected string _errorMessage; protected string _errorMessage;
@ -1121,16 +1124,16 @@ class InvalidToken : Token {
@property void invalidTokenType(TokenType t) { _invalidTokenType = t; } @property void invalidTokenType(TokenType t) { _invalidTokenType = t; }
/// text of invalid token /// text of invalid token
@property override dchar[] text() { return _text; } @property override dstring text() { return _text; }
/// text of invalid token /// text of invalid token
@property void text(dchar[] text) { _text = text; } @property void text(dchar[] text) { _text = cast(dstring)text; }
this() { this() {
super(TokenType.INVALID); super(TokenType.INVALID);
} }
this(SourceFile file, uint line, uint pos, dchar[] text) { this(SourceFile file, uint line, uint pos, dchar[] text) {
super(TokenType.INVALID, file, line, pos); super(TokenType.INVALID, file, line, pos);
_text = text; _text = cast(dstring)text;
} }
override Token clone() { override Token clone() {
InvalidToken res = new InvalidToken(_file, _line, _pos, _text.dup); InvalidToken res = new InvalidToken(_file, _line, _pos, _text.dup);
@ -1145,7 +1148,7 @@ class InvalidToken : Token {
} }
alias tokenizer_ident_t = uint; alias tokenizer_ident_t = uint;
alias tokenizer_ident_name_t = dchar[]; alias tokenizer_ident_name_t = dstring;
enum : tokenizer_ident_t { enum : tokenizer_ident_t {
NO_IDENT = 0 NO_IDENT = 0
@ -1191,8 +1194,9 @@ class IdentHolder {
if (found) if (found)
return *found; return *found;
uint newid = _nextId++; uint newid = _nextId++;
_nameToId[cast(dstring)name] = newid; immutable tokenizer_ident_name_t nameCopy = name.dup;
_idToName[newid] = cast(tokenizer_ident_name_t)name; _nameToId[nameCopy] = newid;
_idToName[newid] = nameCopy;
return newid; return newid;
} }
} }
@ -1208,17 +1212,17 @@ static this() {
} }
class StringLiteralToken : Token { class StringLiteralToken : Token {
dchar[] _text; dstring _text;
dchar _literalType; dchar _literalType;
public @property override dchar literalType() { return _literalType; } public @property override dchar literalType() { return _literalType; }
public @property override dchar[] text() { return _text; } public @property override dstring text() { return _text; }
public void setText(dchar[] text, dchar type) { _text = text; _literalType = type; } public void setText(dchar[] text, dchar type) { _text = cast(dstring)text; _literalType = type; }
this() { this() {
super(TokenType.STRING); super(TokenType.STRING);
} }
this(SourceFile file, uint line, uint pos, dchar[] text, dchar type) { this(SourceFile file, uint line, uint pos, dchar[] text, dchar type) {
super(TokenType.STRING, file, line, pos); super(TokenType.STRING, file, line, pos);
_text = text; _text = cast(dstring)text;
_literalType = type; _literalType = type;
} }
override public Token clone() { override public Token clone() {
@ -1234,7 +1238,7 @@ class CharacterLiteralToken : Token {
dchar _literalType; dchar _literalType;
@property override dchar literalType() { return _literalType; } @property override dchar literalType() { return _literalType; }
@property dchar character() { return _character; } @property dchar character() { return _character; }
@property override dchar[] text() { return [_character]; } @property override dstring text() { return [_character]; }
void setCharacter(dchar ch, dchar type) { _character = ch; _literalType = type; } void setCharacter(dchar ch, dchar type) { _character = ch; _literalType = type; }
this() { this() {
super(TokenType.CHARACTER); super(TokenType.CHARACTER);
@ -1259,7 +1263,7 @@ class IntegerLiteralToken : Token {
public @property override ulong intValue() { return _value; } public @property override ulong intValue() { return _value; }
public @property override bool isUnsigned() { return _unsigned; } public @property override bool isUnsigned() { return _unsigned; }
public @property override ulong isLong() { return _long; } public @property override ulong isLong() { return _long; }
public @property override dchar[] text() { return cast(dchar[])to!dstring(_value); } public @property override dstring text() { return to!dstring(_value); }
public void setValue(ulong value, bool unsignedFlag = false, bool longFlag = false) { public void setValue(ulong value, bool unsignedFlag = false, bool longFlag = false) {
_value = value; _value = value;
_unsigned = unsignedFlag; _unsigned = unsignedFlag;
@ -1296,7 +1300,7 @@ class RealLiteralToken : Token {
public @property override float floatValue() { return cast(float)_value; } public @property override float floatValue() { return cast(float)_value; }
public @property override byte precision() { return _precision; } public @property override byte precision() { return _precision; }
public @property override bool isImaginary() { return _imaginary; } public @property override bool isImaginary() { return _imaginary; }
public @property override dchar[] text() { return cast(dchar[])to!dstring(_value); } public @property override dstring text() { return to!dstring(_value); }
public void setValue(real value, byte precision = 1, bool imaginary = false) { public void setValue(real value, byte precision = 1, bool imaginary = false) {
_value = value; _value = value;
_precision = precision; _precision = precision;
@ -1325,14 +1329,18 @@ class RealLiteralToken : Token {
class IdentToken : Token { class IdentToken : Token {
tokenizer_ident_t _id; tokenizer_ident_t _id;
public @property override dchar[] text() { return identMap.nameById(_id); } public @property override dstring text() {
public void setText(dchar[] text) { _id = identMap.idByName(text); } return identMap.nameById(_id);
}
public void setText(dchar[] text) {
_id = identMap.idByName(cast(immutable)text);
}
this() { this() {
super(TokenType.IDENTIFIER); super(TokenType.IDENTIFIER);
} }
this(SourceFile file, uint line, uint pos, dchar[] text) { this(SourceFile file, uint line, uint pos, dchar[] text) {
super(TokenType.IDENTIFIER, file, line, pos); super(TokenType.IDENTIFIER, file, line, pos);
_id = identMap.idByName(text); _id = identMap.idByName(cast(immutable)text);
} }
this(SourceFile file, uint line, uint pos, tokenizer_ident_t id) { this(SourceFile file, uint line, uint pos, tokenizer_ident_t id) {
super(TokenType.IDENTIFIER, file, line, pos); super(TokenType.IDENTIFIER, file, line, pos);
@ -2714,7 +2722,7 @@ class Tokenizer
if (ch == EOF_CHAR) { if (ch == EOF_CHAR) {
return emitEof(); return emitEof();
} }
if (ch == EOL_CHAR || ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C) { if (ch == '\r' || ch == '\n' || ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C) {
// white space (treat EOL as whitespace, too) // white space (treat EOL as whitespace, too)
return processWhiteSpace(ch); return processWhiteSpace(ch);
} }
@ -2786,7 +2794,18 @@ class Tokenizer
return parserError("Invalid token", _line, _pos); return parserError("Invalid token", _line, _pos);
} }
/// tokenize all
Token[] allTokens() {
Token[] res;
res.assumeSafeAppend;
for(;;) {
Token tok = nextToken();
if (!tok || tok.type == TokenType.EOF)
break;
res ~= tok.clone();
}
return res;
}
} }
unittest { unittest {

View File

@ -13,6 +13,11 @@ mixin APP_ENTRY_POINT;
/// entry point for dlangui based application /// entry point for dlangui based application
extern (C) int UIAppMain(string[] args) { extern (C) int UIAppMain(string[] args) {
debug(TestParser) {
import ddc.lexer.parser;
runParserTests();
}
version(Windows) { version(Windows) {
debug { debug {
sharedLog = new FileLogger("dcd.log"); sharedLog = new FileLogger("dcd.log");