fix tokenizer; fix syntax highlight

This commit is contained in:
Vadim Lopatin 2015-01-20 15:59:11 +03:00
parent d580165beb
commit 1c514145b6
3 changed files with 114 additions and 81 deletions

View File

@ -867,7 +867,7 @@ public dstring getKeywordNameD(Keyword keyword) pure nothrow {
return KEYWORD_STRINGS[keyword]; return KEYWORD_STRINGS[keyword];
}; };
public Keyword findKeyword(Keyword start, Keyword end, dchar * name, uint len, ref uint pos) pure nothrow { public Keyword findKeyword(Keyword start, Keyword end, dchar * name, int len, ref int pos) pure nothrow {
for (Keyword i = start; i <= end; i++) { for (Keyword i = start; i <= end; i++) {
dstring s = KEYWORD_STRINGS[i]; dstring s = KEYWORD_STRINGS[i];
if (s.length > len + 1) if (s.length > len + 1)
@ -894,13 +894,13 @@ public Keyword findKeyword(Keyword start, Keyword end, dchar * name, uint len, r
*/ */
class Token { class Token {
protected SourceFile _file; protected SourceFile _file;
protected uint _line; protected int _line;
protected uint _pos; protected int _pos;
protected TokenType _type; protected TokenType _type;
public @property TokenType type() { return _type; } public @property TokenType type() { return _type; }
public @property string filename() { return _file.filename; } public @property string filename() { return _file.filename; }
public @property uint line() { return _line; } public @property int line() { return _line; }
public @property uint pos() { return _pos; } public @property int pos() { return _pos; }
public @property dchar[] text() { return null; } public @property dchar[] text() { return null; }
public @property dchar literalType() { return 0; } public @property dchar literalType() { return 0; }
public @property ulong intValue() { return 0; } public @property ulong intValue() { return 0; }
@ -918,14 +918,14 @@ class Token {
_type = type; _type = type;
} }
this(TokenType type, SourceFile file, uint line, uint pos) { this(TokenType type, SourceFile file, int line, int pos) {
_type = type; _type = type;
_file = file; _file = file;
_line = line; _line = line;
_pos = pos; _pos = pos;
} }
void setPos(SourceFile file, uint line, uint pos) { void setPos(SourceFile file, int line, int pos) {
_file = file; _file = file;
_line = line; _line = line;
_pos = pos + 1; _pos = pos + 1;
@ -935,7 +935,7 @@ class Token {
_file = file; _file = file;
} }
void setPos(uint line, uint pos) { void setPos(int line, int pos) {
_line = line; _line = line;
_pos = pos + 1; _pos = pos + 1;
} }
@ -1260,9 +1260,10 @@ class Tokenizer
{ {
SourceLines _lineStream; SourceLines _lineStream;
dchar[] _lineText; dchar[] _lineText;
uint _line; // current line number int _line; // current line number
uint _len; // current line length int _len; // current line length
uint _pos; // current line read position int _pos; // current line read position
int _prevLineLength; // previous line length
uint _state; // tokenizer state uint _state; // tokenizer state
enum : int { enum : int {
@ -1304,6 +1305,7 @@ class Tokenizer
buildTime = Clock.currTime(); buildTime = Clock.currTime();
_line = lineStream.line; _line = lineStream.line;
_pos = 0; _pos = 0;
_prevLineLength = 0;
_lineText = null; _lineText = null;
} }
@ -1313,17 +1315,22 @@ class Tokenizer
// fetch next line from source stream // fetch next line from source stream
bool nextLine() { bool nextLine() {
_prevLineLength = _lineText.length;
_lineText = _lineStream.readLine(); _lineText = _lineStream.readLine();
if (_lineText is null) { if (!_lineText) {
if (_lineStream.errorCode != 0) if (_lineStream.errorCode != 0)
throw new SourceEncodingException(_lineStream.errorMessage, _lineStream.file.filename, _lineStream.errorLine, _lineStream.errorPos); throw new SourceEncodingException(_lineStream.errorMessage, _lineStream.file.filename, _lineStream.errorLine, _lineStream.errorPos);
_pos = 0; if (_lineStream.eof) {
_len = 0; // end of file
return false; _pos = 0;
_len = 0;
return false;
}
// just an empty line
} }
_line = _lineStream.line; _line = _lineStream.line;
_pos = 0; _pos = 0;
_len = cast(uint)_lineText.length; // do not support lines longer that 4Gb _len = cast(int)_lineText.length; // do not support lines longer that 4Gb
return true; return true;
} }
@ -1358,13 +1365,17 @@ class Tokenizer
} }
Token processWhiteSpace(dchar firstChar) { Token processWhiteSpace(dchar firstChar) {
uint line = _line; // reuse the same token instance, to avoid extra heap spamming
uint pos = _pos - 1; if (_pos == 0) {
_sharedWhiteSpaceToken.setPos(_line - 1, _prevLineLength);
} else {
_sharedWhiteSpaceToken.setPos(_line, _pos - 1);
}
for (;;) { for (;;) {
uint i = _pos; int i = _pos;
for (; i < _len; i++) { for (; i < _len; i++) {
dchar ch = _lineText[i]; dchar ch = _lineText[i];
if (!(ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C)) if (!(ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C || ch == EOL_CHAR))
break; break;
} }
_pos = i; _pos = i;
@ -1374,8 +1385,6 @@ class Tokenizer
if (!nextLine()) if (!nextLine())
break; break;
} }
// reuse the same token instance, to avoid extra heap spamming
_sharedWhiteSpaceToken.setPos(line, pos);
return _sharedWhiteSpaceToken; return _sharedWhiteSpaceToken;
} }
@ -1388,21 +1397,30 @@ class Tokenizer
return _sharedCommentToken; return _sharedCommentToken;
} }
Token processOneLineSharpComment() {
_sharedCommentToken.setPos(_line, _pos - 1);
if (_enableCommentText) {
_sharedCommentToken.text = _lineText[_pos .. $];
}
_pos = _len;
return _sharedCommentToken;
}
// Comment /* */ // Comment /* */
Token processMultilineComment() { Token processMultilineComment() {
_sharedCommentToken.setPos(_line, _pos - 1); _sharedCommentToken.setPos(_line, _pos - 1);
_commentAppender.reset(); _commentAppender.reset();
uint textStart = _pos + 1; int textStart = _pos + 1;
for (;;) { for (;;) {
uint textEnd = uint.max; int textEnd = int.max;
uint i = textStart; int i = textStart;
for (; i < _len - 1; i++) { for (; i < _len - 1; i++) {
if (_lineText[i] == '*' && _lineText[i + 1] == '/') { if (_lineText[i] == '*' && _lineText[i + 1] == '/') {
textEnd = i; textEnd = i;
break; break;
} }
} }
if (textEnd != uint.max) { if (textEnd != int.max) {
if (_enableCommentText) if (_enableCommentText)
_commentAppender.append(_lineText[textStart .. textEnd]); _commentAppender.append(_lineText[textStart .. textEnd]);
_pos = textEnd + 2; _pos = textEnd + 2;
@ -1426,11 +1444,11 @@ class Tokenizer
_sharedCommentToken.setPos(_line, _pos - 1); _sharedCommentToken.setPos(_line, _pos - 1);
_commentAppender.reset(); _commentAppender.reset();
dchar[] text; dchar[] text;
uint textStart = _pos + 1; int textStart = _pos + 1;
int level = 1; int level = 1;
for (;;) { for (;;) {
uint textEnd = uint.max; int textEnd = int.max;
uint i = textStart; int i = textStart;
for (; i < _len - 1; i++) { for (; i < _len - 1; i++) {
if (_lineText[i] == '/' && _lineText[i + 1] == '+') { if (_lineText[i] == '/' && _lineText[i + 1] == '+') {
level++; level++;
@ -1442,7 +1460,7 @@ class Tokenizer
} }
} }
} }
if (textEnd != uint.max) { if (textEnd != int.max) {
if (_enableCommentText) if (_enableCommentText)
_commentAppender.append(_lineText[textStart .. textEnd]); _commentAppender.append(_lineText[textStart .. textEnd]);
_pos = textEnd + 2; _pos = textEnd + 2;
@ -1485,9 +1503,9 @@ class Tokenizer
Token processIdent() { Token processIdent() {
_sharedIdentToken.setPos(_line, _pos - 1); _sharedIdentToken.setPos(_line, _pos - 1);
_identAppender.reset(); _identAppender.reset();
uint startPos = _pos - 1; int startPos = _pos - 1;
uint endPos = _len; int endPos = _len;
for (uint i = _pos; i < _len; i++) { for (int i = _pos; i < _len; i++) {
dchar ch = _lineText[i]; dchar ch = _lineText[i];
if (!isIdentMiddleChar(ch)) { if (!isIdentMiddleChar(ch)) {
endPos = i; endPos = i;
@ -1535,7 +1553,7 @@ class Tokenizer
parserError("Unexpected end of line in binary number"); parserError("Unexpected end of line in binary number");
int digits = 0; int digits = 0;
ulong number = 0; ulong number = 0;
uint i = _pos; int i = _pos;
for (;i < _len; i++) { for (;i < _len; i++) {
dchar ch = _lineText[i]; dchar ch = _lineText[i];
if (ch != '0' && ch != '1') if (ch != '0' && ch != '1')
@ -1558,7 +1576,7 @@ class Tokenizer
parserError("Unexpected end of line in hex number"); parserError("Unexpected end of line in hex number");
int digits = 0; int digits = 0;
ulong number = 0; ulong number = 0;
uint i = _pos; int i = _pos;
for (;i < _len; i++) { for (;i < _len; i++) {
dchar ch = _lineText[i]; dchar ch = _lineText[i];
uint digit = 0; uint digit = 0;
@ -1588,11 +1606,11 @@ class Tokenizer
parserError("Unexpected end of line in octal number"); parserError("Unexpected end of line in octal number");
int digits = 0; int digits = 0;
ulong number = 0; ulong number = 0;
uint i = _pos; int i = _pos;
bool overflow = false; bool overflow = false;
for (;i < _len; i++) { for (;i < _len; i++) {
dchar ch = _lineText[i]; dchar ch = _lineText[i];
uint digit = 0; int digit = 0;
if (ch >= '0' && ch <= '7') if (ch >= '0' && ch <= '7')
digit = ch - '0'; digit = ch - '0';
else if (ch == '_') else if (ch == '_')
@ -1637,7 +1655,7 @@ class Tokenizer
parserError("Invalid exponent"); parserError("Invalid exponent");
ulong digits = 0; ulong digits = 0;
ulong number = 0; ulong number = 0;
uint i = _pos; int i = _pos;
bool overflow = false; bool overflow = false;
for (;i < _len; i++) { for (;i < _len; i++) {
dchar ch = _lineText[i]; dchar ch = _lineText[i];
@ -1672,7 +1690,7 @@ class Tokenizer
} }
ulong divider = 1; ulong divider = 1;
ulong number = 0; ulong number = 0;
uint i = _pos; int i = _pos;
bool overflow = false; bool overflow = false;
for (;i < _len; i++) { for (;i < _len; i++) {
dchar ch = _lineText[i]; dchar ch = _lineText[i];
@ -1712,7 +1730,7 @@ class Tokenizer
parserError("Unexpected end of line in number"); parserError("Unexpected end of line in number");
int digits = 0; int digits = 0;
ulong number = 0; ulong number = 0;
uint i = _pos; int i = _pos;
bool overflow = false; bool overflow = false;
for (;i < _len; i++) { for (;i < _len; i++) {
dchar ch = _lineText[i]; dchar ch = _lineText[i];
@ -1754,7 +1772,7 @@ class Tokenizer
Keyword detectKeyword(dchar ch) { Keyword detectKeyword(dchar ch) {
if (ch > 'z') if (ch > 'z')
return Keyword.NONE; return Keyword.NONE;
uint len = _len - _pos; int len = _len - _pos;
switch (cast(ubyte)ch) { switch (cast(ubyte)ch) {
// ABSTRACT, // ABSTRACT,
// ALIAS, // ALIAS,
@ -2188,15 +2206,15 @@ class Tokenizer
} }
dchar type = 0; dchar type = 0;
for (;;) { for (;;) {
uint i = _pos; int i = _pos;
uint endPos = uint.max; int endPos = int.max;
for(; i < _len; i++) { for(; i < _len; i++) {
if (_lineText[i] == delimiter && (i == 0 || _lineText[i - 1] != '\\')) { if (_lineText[i] == delimiter && (i == 0 || _lineText[i - 1] != '\\')) {
endPos = i; endPos = i;
break; break;
} }
} }
if (endPos != uint.max) { if (endPos != int.max) {
// found end quote // found end quote
_stringLiteralAppender.append(_lineText[_pos .. endPos]); _stringLiteralAppender.append(_lineText[_pos .. endPos]);
_pos = endPos + 1; _pos = endPos + 1;
@ -2256,13 +2274,13 @@ class Tokenizer
static immutable dstring VERSION = "0.1"; static immutable dstring VERSION = "0.1";
static immutable dstring VENDOR = "coolreader.org"; static immutable dstring VENDOR = "coolreader.org";
Token makeSpecialTokenString(dstring str, uint pos) { Token makeSpecialTokenString(dstring str, int pos) {
_sharedStringLiteralToken.setPos(_line, pos); _sharedStringLiteralToken.setPos(_line, pos);
_sharedStringLiteralToken.setText(cast(dchar[])str, 0); _sharedStringLiteralToken.setText(cast(dchar[])str, 0);
return _sharedStringLiteralToken; return _sharedStringLiteralToken;
} }
Token processSpecialToken(Keyword keyword, uint pos) { Token processSpecialToken(Keyword keyword, int pos) {
switch (keyword) { switch (keyword) {
//Special Token Replaced with //Special Token Replaced with
case Keyword.DATE: // string literal of the date of compilation "mmm dd yyyy" case Keyword.DATE: // string literal of the date of compilation "mmm dd yyyy"
@ -2300,6 +2318,8 @@ class Tokenizer
else if (next == '+') else if (next == '+')
return processNestedComment(); return processNestedComment();
} }
if (ch == '#' && _line == 1)
return processOneLineSharpComment();
if (ch == '\"') if (ch == '\"')
return processDoubleQuotedOrWysiwygString(ch); return processDoubleQuotedOrWysiwygString(ch);
if (ch == 'x' && next == '\"') if (ch == 'x' && next == '\"')
@ -2308,7 +2328,7 @@ class Tokenizer
return processDelimitedString(); return processDelimitedString();
if ((ch == 'r' && next == '\"') || (ch == '`')) if ((ch == 'r' && next == '\"') || (ch == '`'))
return processDoubleQuotedOrWysiwygString(ch); return processDoubleQuotedOrWysiwygString(ch);
uint oldPos = _pos - 1; int oldPos = _pos - 1;
if (ch == '0') { if (ch == '0') {
if (next == 'b' || next == 'B') if (next == 'b' || next == 'B')
@ -2361,9 +2381,9 @@ class Tokenizer
unittest { unittest {
import std.algorithm; import std.algorithm;
class TokenTest { class TokenTest {
uint _line; int _line;
string _file; string _file;
this(string file, uint line) { this(string file, int line) {
_file = file; _file = file;
_line = line; _line = line;
} }
@ -2382,7 +2402,7 @@ unittest {
} }
void testTokenizer(string code, TokenTest[] tokens, string file = __FILE__, uint line = __LINE__) { void testTokenizer(string code, TokenTest[] tokens, string file = __FILE__, uint line = __LINE__) {
Tokenizer tokenizer = new Tokenizer(code, "tokenizerTest:" ~ file ~ ":" ~ to!string(line)); Tokenizer tokenizer = new Tokenizer(code, "tokenizerTest:" ~ file ~ ":" ~ to!string(line));
for (uint i = 0; i < tokens.length; i++) { for (int i = 0; i < tokens.length; i++) {
tokens[i].execute(tokenizer); tokens[i].execute(tokenizer);
} }
} }

View File

@ -31,11 +31,15 @@ interface SourceLines {
@property int errorLine(); @property int errorLine();
/// error position /// error position
@property int errorPos(); @property int errorPos();
/// end of file reached
@property bool eof();
/// read line, return null if EOF reached or error occured /// read line, return null if EOF reached or error occured
dchar[] readLine(); dchar[] readLine();
} }
const TEXT_SOURCE_ERROR_EOF = 1;
/// Simple text source based on array /// Simple text source based on array
class ArraySourceLines : SourceLines { class ArraySourceLines : SourceLines {
protected SourceFile _file; protected SourceFile _file;
@ -75,6 +79,10 @@ class ArraySourceLines : SourceLines {
return true; return true;
} }
/// end of file reached
override @property bool eof() {
return _line >= _lines.length;
}
/// source file /// source file
override @property SourceFile file() { return _file; } override @property SourceFile file() { return _file; }
/// last read line /// last read line

View File

@ -40,52 +40,56 @@ class SimpleDSyntaxHighlighter : SyntaxHighlighter {
/// categorize characters in content by token types /// categorize characters in content by token types
void updateHighlight(dstring[] lines, TokenPropString[] props, int changeStartLine, int changeEndLine) { void updateHighlight(dstring[] lines, TokenPropString[] props, int changeStartLine, int changeEndLine) {
Log.d("updateHighlight");
long ms0 = currentTimeMillis();
_props = props; _props = props;
changeStartLine = 0; changeStartLine = 0;
changeEndLine = lines.length; changeEndLine = lines.length;
_lines.init(lines[changeStartLine..$], _file, changeStartLine); _lines.init(lines[changeStartLine..$], _file, changeStartLine);
_tokenizer.init(_lines); _tokenizer.init(_lines);
uint tokenPos = 0; int tokenPos = 0;
uint tokenLine = 0; int tokenLine = 0;
ubyte category = 0; ubyte category = 0;
for (;;) { for (;;) {
Token token = _tokenizer.nextToken(); Token token = _tokenizer.nextToken();
if (token is null) { if (token is null) {
//writeln("Null token returned"); //Log.d("Null token returned");
break; break;
} }
if (token.type == TokenType.EOF) { if (token.type == TokenType.EOF) {
//writeln("EOF token"); //Log.d("EOF token");
break; break;
} }
uint newPos = token.pos - 1; uint newPos = token.pos - 1;
uint newLine = token.line - 1; uint newLine = token.line - 1;
//if (category) { //Log.d("", token.line, ":", token.pos, "\t", tokenLine + 1, ":", tokenPos + 1, "\t", token.toString);
// fill with category
for (uint i = tokenLine; i <= newLine; i++) { // fill with category
uint start = i > tokenLine ? 0 : tokenPos; for (int i = tokenLine; i <= newLine; i++) {
uint end = i < newLine ? lines[i].length : tokenPos; int start = i > tokenLine ? 0 : tokenPos;
for (uint j = start; j < end; j++) { int end = i < newLine ? lines[i].length : newPos;
assert(i < _props.length); for (int j = start; j < end; j++)
if (j - 1 < _props[i].length) _props[i][j] = category;
_props[i][j - 1] = category; }
}
}
//}
TokenType t = token.type;
// handle token - convert to category // handle token - convert to category
if (t == TokenType.COMMENT) { switch(token.type) {
category = TokenCategory.Comment; case TokenType.COMMENT:
} else if (t == TokenType.KEYWORD) { category = TokenCategory.Comment;
category = TokenCategory.Keyword; break;
} else if (t == TokenType.IDENTIFIER) { case TokenType.KEYWORD:
category = TokenCategory.Identifier; category = TokenCategory.Keyword;
} else if (t == TokenType.STRING) { break;
category = TokenCategory.String; case TokenType.IDENTIFIER:
} else { category = TokenCategory.Identifier;
category = 0; break;
case TokenType.STRING:
category = TokenCategory.String;
break;
default:
category = 0;
break;
} }
tokenPos = newPos; tokenPos = newPos;
tokenLine= newLine; tokenLine= newLine;
@ -93,6 +97,7 @@ class SimpleDSyntaxHighlighter : SyntaxHighlighter {
} }
_lines.close(); _lines.close();
_props = null; _props = null;
Log.d("updateHighlight took ", currentTimeMillis() - ms0, "ms");
} }
} }
@ -102,10 +107,10 @@ class DSourceEdit : SourceEdit {
super(ID); super(ID);
styleId = null; styleId = null;
backgroundColor = 0xFFFFFF; backgroundColor = 0xFFFFFF;
setTokenHightlightColor(TokenCategory.Comment, 0x808080); // gray setTokenHightlightColor(TokenCategory.Comment, 0x008000); // green
setTokenHightlightColor(TokenCategory.Keyword, 0x0020C0); // blue setTokenHightlightColor(TokenCategory.Keyword, 0x0000FF); // blue
setTokenHightlightColor(TokenCategory.String, 0xC02000); // red setTokenHightlightColor(TokenCategory.String, 0xA31515); // red
setTokenHightlightColor(TokenCategory.Identifier, 0x206000); // green //setTokenHightlightColor(TokenCategory.Identifier, 0x206000); // no colors
} }
this() { this() {
this("SRCEDIT"); this("SRCEDIT");