mirror of https://github.com/buggins/dlangide.git
error tolerant tokenizer
This commit is contained in:
parent
c06627937d
commit
9f7c4c5274
|
@ -1,10 +0,0 @@
|
|||
module ddc.lexer.SourceEncodingException;
|
||||
|
||||
class SourceEncodingException : Exception
|
||||
{
|
||||
this(string msg)
|
||||
{
|
||||
super(msg);
|
||||
}
|
||||
}
|
||||
|
|
@ -2,31 +2,36 @@ module ddc.lexer.exceptions;
|
|||
|
||||
import std.conv;
|
||||
|
||||
import ddc.lexer.textsource;
|
||||
|
||||
class ParserException : Exception {
|
||||
string _msg;
|
||||
string _filename;
|
||||
size_t _line;
|
||||
size_t _pos;
|
||||
protected string _msg;
|
||||
protected SourceFile _file;
|
||||
protected int _line;
|
||||
protected int _pos;
|
||||
|
||||
public @property size_t line() { return _line; }
|
||||
@property SourceFile file() { return _file; }
|
||||
@property string msg() { return _msg; }
|
||||
@property int line() { return _line; }
|
||||
@property int pos() { return _pos; }
|
||||
|
||||
this(string msg, string filename, size_t line, size_t pos) {
|
||||
super(msg ~ " at " ~ filename ~ " line " ~ to!string(line) ~ " column " ~ to!string(pos));
|
||||
this(string msg, SourceFile file, int line, int pos) {
|
||||
super(msg ~ " at " ~ file.toString ~ " line " ~ to!string(line) ~ " column " ~ to!string(pos));
|
||||
_msg = msg;
|
||||
_filename = filename;
|
||||
_file = file;
|
||||
_line = line;
|
||||
_pos = pos;
|
||||
}
|
||||
}
|
||||
|
||||
class LexerException : ParserException {
|
||||
this(string msg, string filename, size_t line, size_t pos) {
|
||||
super(msg, filename, line, pos);
|
||||
this(string msg, SourceFile file, int line, int pos) {
|
||||
super(msg, file, line, pos);
|
||||
}
|
||||
}
|
||||
|
||||
class SourceEncodingException : LexerException {
|
||||
this(string msg, string filename, size_t line, size_t pos) {
|
||||
super(msg, filename, line, pos);
|
||||
this(string msg, SourceFile file, int line, int pos) {
|
||||
super(msg, file, line, pos);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,10 +8,13 @@ private import std.array;
|
|||
* Even if contains only file name, it's better to use it instead of string - object reference size is twice less than array ref.
|
||||
*/
|
||||
class SourceFile {
|
||||
protected string _file;
|
||||
public @property string filename() { return _file; }
|
||||
protected string _filename;
|
||||
@property string filename() { return _filename; }
|
||||
public this(string filename) {
|
||||
_file = filename;
|
||||
_filename = filename;
|
||||
}
|
||||
override @property string toString() {
|
||||
return _filename;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -900,11 +900,18 @@ class Token {
|
|||
protected int _line;
|
||||
protected int _pos;
|
||||
protected TokenType _type;
|
||||
/// returns token type
|
||||
@property TokenType type() { return _type; }
|
||||
@property string filename() { return _file.filename; }
|
||||
/// returns file info for source
|
||||
@property SourceFile filename() { return _file; }
|
||||
/// returns 1-based source line number of token start
|
||||
@property int line() { return _line; }
|
||||
/// returns 1-based source line position of token start
|
||||
@property int pos() { return _pos; }
|
||||
/// returns token text
|
||||
@property dchar[] text() { return null; }
|
||||
|
||||
// number token properties
|
||||
@property dchar literalType() { return 0; }
|
||||
@property ulong intValue() { return 0; }
|
||||
@property bool isUnsigned() { return false; }
|
||||
|
@ -914,10 +921,25 @@ class Token {
|
|||
@property float floatValue() { return 0; }
|
||||
@property byte precision() { return 0; }
|
||||
@property bool isImaginary() { return false; }
|
||||
|
||||
/// returns opcode ID - for opcode tokens
|
||||
@property OpCode opCode() { return OpCode.NONE; }
|
||||
/// returns keyword ID - for keyword tokens
|
||||
@property Keyword keyword() { return Keyword.NONE; }
|
||||
/// returns true if this is documentation comment token
|
||||
@property bool isDocumentationComment() { return false; }
|
||||
|
||||
// error handling
|
||||
|
||||
/// returns true if it's invalid token (can be returned in error tolerant mode of tokenizer)
|
||||
@property bool isError() { return type == TokenType.INVALID; }
|
||||
/// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer)
|
||||
@property string errorMessage() { return null; }
|
||||
/// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer)
|
||||
@property int errorCode() { return 0; }
|
||||
/// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer)
|
||||
@property TokenType invalidTokenType() { return TokenType.INVALID; }
|
||||
|
||||
|
||||
this(TokenType type) {
|
||||
_type = type;
|
||||
|
@ -929,17 +951,17 @@ class Token {
|
|||
_line = line;
|
||||
_pos = pos;
|
||||
}
|
||||
|
||||
/// set start position for token (line is 1-based, pos is 0-based)
|
||||
void setPos(SourceFile file, int line, int pos) {
|
||||
_file = file;
|
||||
_line = line;
|
||||
_pos = pos + 1;
|
||||
}
|
||||
|
||||
/// set source file information for token
|
||||
void setFile(SourceFile file) {
|
||||
_file = file;
|
||||
}
|
||||
|
||||
/// set start position for token (line is 1-based, pos is 0-based)
|
||||
void setPos(int line, int pos) {
|
||||
_line = line;
|
||||
_pos = pos + 1;
|
||||
|
@ -974,6 +996,7 @@ class EofToken : Token {
|
|||
// }
|
||||
//}
|
||||
|
||||
/// white space token
|
||||
class WhiteSpaceToken : Token {
|
||||
this() {
|
||||
super(TokenType.WHITESPACE);
|
||||
|
@ -1027,7 +1050,7 @@ class KeywordToken : Token {
|
|||
}
|
||||
}
|
||||
|
||||
// do we need comment text?
|
||||
/// comment token
|
||||
class CommentToken : Token {
|
||||
protected dchar[] _text;
|
||||
protected bool _isDocumentationComment;
|
||||
|
@ -1060,9 +1083,28 @@ class CommentToken : Token {
|
|||
/// Invalid token holder - for error tolerant parsing
|
||||
class InvalidToken : Token {
|
||||
protected dchar[] _text;
|
||||
protected TokenType _invalidTokenType;
|
||||
protected int _errorCode;
|
||||
protected string _errorMessage;
|
||||
|
||||
/// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer)
|
||||
override @property string errorMessage() { return _errorMessage; }
|
||||
/// sets error message
|
||||
@property void errorMessage(string s) { _errorMessage = s; }
|
||||
/// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer)
|
||||
override @property int errorCode() { return _errorCode; }
|
||||
/// sets error code
|
||||
@property void errorCode(int c) { _errorCode = c; }
|
||||
/// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer)
|
||||
override @property TokenType invalidTokenType() { return _invalidTokenType; }
|
||||
/// sets type of token parsing of which has been failed
|
||||
@property void invalidTokenType(TokenType t) { _invalidTokenType = t; }
|
||||
|
||||
/// text of invalid token
|
||||
@property override dchar[] text() { return _text; }
|
||||
/// text of invalid token
|
||||
@property void text(dchar[] text) { _text = text; }
|
||||
|
||||
this() {
|
||||
super(TokenType.INVALID);
|
||||
}
|
||||
|
@ -1071,7 +1113,11 @@ class InvalidToken : Token {
|
|||
_text = text;
|
||||
}
|
||||
override Token clone() {
|
||||
return new InvalidToken(_file, _line, _pos, _text.dup);
|
||||
InvalidToken res = new InvalidToken(_file, _line, _pos, _text.dup);
|
||||
res._errorMessage = _errorMessage.dup;
|
||||
res._errorCode = _errorCode;
|
||||
res._invalidTokenType = _invalidTokenType;
|
||||
return res;
|
||||
}
|
||||
override @property string toString() {
|
||||
return "Invalid:" ~ to!string(_text);
|
||||
|
@ -1372,7 +1418,7 @@ class Tokenizer
|
|||
_lineText = _lineStream.readLine();
|
||||
if (!_lineText) {
|
||||
if (_lineStream.errorCode != 0)
|
||||
throw new SourceEncodingException(_lineStream.errorMessage, _lineStream.file.filename, _lineStream.errorLine, _lineStream.errorPos);
|
||||
throw new SourceEncodingException(_lineStream.errorMessage, _lineStream.file, _lineStream.errorLine, _lineStream.errorPos);
|
||||
if (_lineStream.eof) {
|
||||
// end of file
|
||||
_pos = 0;
|
||||
|
@ -1822,16 +1868,38 @@ class Tokenizer
|
|||
}
|
||||
|
||||
/// Either return InvalidToken or throw parser exception depending on current errorTolerant flag
|
||||
protected Token parserError(string msg, Token incompleteToken, dchar currentChar = 0) {
|
||||
return parserError(msg, incompleteToken.line, incompleteToken.pos, currentChar);
|
||||
protected Token parserError(string msg, Token incompleteToken) {
|
||||
return parserError(msg, incompleteToken.line, incompleteToken.pos, incompleteToken.type);
|
||||
}
|
||||
/// Either return InvalidToken or throw parser exception depending on current errorTolerant flag
|
||||
protected Token parserError(string msg, int startLine, int startPos, dchar currentChar = 0) {
|
||||
protected Token parserError(string msg, int startLine, int startPos, TokenType failedTokenType = TokenType.INVALID) {
|
||||
if (_errorTolerant) {
|
||||
startPos--;
|
||||
_sharedInvalidToken.setPos(startLine, startPos);
|
||||
_sharedInvalidToken.errorMessage = msg;
|
||||
_sharedInvalidToken.errorCode = 1; // for future extension
|
||||
_sharedInvalidToken.invalidTokenType = failedTokenType; // for future extension
|
||||
// make invalid source text
|
||||
dchar[] invalidText;
|
||||
int p = startLine == _line ? startPos : 0;
|
||||
for (int i = p; i < _pos && i < _lineText.length; i++)
|
||||
invalidText ~= _lineText[i];
|
||||
|
||||
// recover after error
|
||||
for (; _pos < _lineText.length; _pos++) {
|
||||
dchar ch = _lineText[_pos];
|
||||
if (ch == ' ' || ch == '\t' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}')
|
||||
break;
|
||||
if (failedTokenType == TokenType.INTEGER || failedTokenType == TokenType.FLOAT) {
|
||||
if (ch == '*' || ch == '/')
|
||||
break;
|
||||
}
|
||||
invalidText ~= ch;
|
||||
}
|
||||
_sharedInvalidToken.text = invalidText;
|
||||
return _sharedInvalidToken;
|
||||
}
|
||||
throw new ParserException(msg, _lineStream.file.filename, _line, _pos);
|
||||
throw new ParserException(msg, _lineStream.file, _line, _pos);
|
||||
}
|
||||
|
||||
protected Keyword detectKeyword(dchar ch) {
|
||||
|
|
|
@ -96,10 +96,27 @@ class SimpleDSyntaxHighlighter : SyntaxHighlighter {
|
|||
category = TokenCategory.Integer;
|
||||
break;
|
||||
case TokenType.FLOAT:
|
||||
category = TokenCategory.FLoat;
|
||||
category = TokenCategory.Float;
|
||||
break;
|
||||
case TokenType.INVALID:
|
||||
category = TokenCategory.Error;
|
||||
switch (token.invalidTokenType) {
|
||||
case TokenType.IDENTIFIER:
|
||||
category = TokenCategory.Error_InvalidIdentifier;
|
||||
break;
|
||||
case TokenType.STRING:
|
||||
category = TokenCategory.Error_InvalidString;
|
||||
break;
|
||||
case TokenType.COMMENT:
|
||||
category = TokenCategory.Error_InvalidComment;
|
||||
break;
|
||||
case TokenType.FLOAT:
|
||||
case TokenType.INTEGER:
|
||||
category = TokenCategory.Error_InvalidNumber;
|
||||
break;
|
||||
default:
|
||||
category = TokenCategory.Error;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
category = 0;
|
||||
|
|
Loading…
Reference in New Issue