syntax highlight, continue

This commit is contained in:
Vadim Lopatin 2015-01-19 22:35:44 +03:00
parent ecfafab3bc
commit fd763e525a
9 changed files with 3774 additions and 2 deletions

View File

@ -66,7 +66,7 @@
<debuglevel>0</debuglevel>
<debugids />
<versionlevel>0</versionlevel>
<versionids>Unicode</versionids>
<versionids>Unicode USE_SDL USE_OPENGL</versionids>
<dump_source>0</dump_source>
<mapverbosity>3</mapverbosity>
<createImplib>0</createImplib>
@ -189,6 +189,17 @@
<filesToClean>*.obj;*.cmd;*.build;*.json;*.dep</filesToClean>
</Config>
<Folder name="dlangide">
<Folder name="ddc">
<Folder name="lexer">
<File path="src\ddc\lexer\exceptions.d" />
<File path="src\ddc\lexer\Lexer.d" />
<File path="src\ddc\lexer\LexerException.d" />
<File path="src\ddc\lexer\LineStream.d" />
<File path="src\ddc\lexer\SourceEncodingException.d" />
<File path="src\ddc\lexer\textsource.d" />
<File path="src\ddc\lexer\Tokenizer.d" />
</Folder>
</Folder>
<Folder name="dlangide">
<Folder name="ui">
<File path="src\dlangide\ui\commands.d" />

288
src/ddc/lexer/Lexer.d Normal file
View File

@ -0,0 +1,288 @@
// D grammar - according to http://dlang.org/grammar
module ddc.lexer.Lexer;
import ddc.lexer.LineStream;
import ddc.lexer.Tokenizer;
/** Lexem type constants */
enum LexemType : ushort {
UNKNOWN,
// types
TYPE,
TYPE_CTORS,
TYPE_CTOR,
BASIC_TYPE,
BASIC_TYPE_X,
BASIC_TYPE_2,
IDENTIFIER_LIST,
IDENTIFIER,
TYPEOF,
// templates
TEMPLATE_INSTANCE,
EXPRESSION,
ALT_DECLARATOR,
}
class Lexem {
public @property LexemType type() { return LexemType.UNKNOWN; }
}
/**
Returns true for one of keywords: bool, byte, ubyte, short, ushort, int, uint, long, ulong,
char, wchar, dchar, float, double, real, ifloat, idouble, ireal, cfloat, cdouble, creal, void
*/
bool isBasicTypeXToken(Token token) {
if (token.type != TokenType.KEYWORD)
return false;
Keyword id = token.keyword;
return id == Keyword.BOOL
|| id == Keyword.BYTE
|| id == Keyword.UBYTE
|| id == Keyword.SHORT
|| id == Keyword.USHORT
|| id == Keyword.INT
|| id == Keyword.UINT
|| id == Keyword.LONG
|| id == Keyword.ULONG
|| id == Keyword.CHAR
|| id == Keyword.WCHAR
|| id == Keyword.DCHAR
|| id == Keyword.FLOAT
|| id == Keyword.DOUBLE
|| id == Keyword.REAL
|| id == Keyword.IFLOAT
|| id == Keyword.IDOUBLE
|| id == Keyword.IREAL
|| id == Keyword.CFLOAT
|| id == Keyword.CDOUBLE
|| id == Keyword.CREAL
|| id == Keyword.VOID;
}
/**
Single token, one of keywords: bool, byte, ubyte, short, ushort, int, uint, long, ulong,
char, wchar, dchar, float, double, real, ifloat, idouble, ireal, cfloat, cdouble, creal, void
*/
class BasicTypeX : Lexem {
public Token _token;
public override @property LexemType type() { return LexemType.BASIC_TYPE_X; }
public this(Token token)
in {
assert(isBasicTypeXToken(token));
}
body {
_token = token;
}
}
/**
Returns true for one of keywords: const, immutable, inout, shared
*/
bool isTypeCtorToken(Token token) {
if (token.type != TokenType.KEYWORD)
return false;
Keyword id = token.keyword;
return id == Keyword.CONST
|| id == Keyword.IMMUTABLE
|| id == Keyword.INOUT
|| id == Keyword.SHARED;
}
/**
Single token, one of keywords: const, immutable, inout, shared
*/
class TypeCtor : Lexem {
public Token _token;
public override @property LexemType type() { return LexemType.TYPE_CTOR; }
public this(Token token)
in {
assert(isTypeCtorToken(token));
}
body {
_token = token;
}
}
/**
Zero, one or several keywords: const, immutable, inout, shared
*/
class TypeCtors : Lexem {
public TypeCtor[] _list;
public override @property LexemType type() { return LexemType.TYPE_CTORS; }
public this(Token token)
in {
assert(isTypeCtorToken(token));
}
body {
_list ~= new TypeCtor(token);
}
public void append(Token token)
in {
assert(isTypeCtorToken(token));
}
body {
_list ~= new TypeCtor(token);
}
}
/**
Identifier.
*/
class Identifier : Lexem {
IdentToken _token;
public override @property LexemType type() { return LexemType.IDENTIFIER; }
public this(Token identifier)
in {
assert(identifier.type == TokenType.IDENTIFIER);
}
body {
_token = cast(IdentToken)identifier;
}
}
/**
Identifier list.
IdentifierList:
Identifier
Identifier . IdentifierList
TemplateInstance
TemplateInstance . IdentifierList
*/
class IdentifierList : Lexem {
public Identifier _identifier;
public IdentifierList _identifierList;
public TemplateInstance _templateInstance;
public override @property LexemType type() { return LexemType.IDENTIFIER_LIST; }
public this(Token ident, IdentifierList identifierList = null)
in {
assert(ident.type == TokenType.IDENTIFIER);
}
body {
_identifier = new Identifier(ident);
_identifierList = identifierList;
}
public this(TemplateInstance templateInstance, IdentifierList identifierList = null)
in {
}
body {
_templateInstance = templateInstance;
_identifierList = identifierList;
}
}
/**
Template instance.
TemplateInstance:
Identifier TemplateArguments
*/
class TemplateInstance : Lexem {
public override @property LexemType type() { return LexemType.TEMPLATE_INSTANCE; }
public this()
in {
}
body {
}
}
/**
Basic type.
BasicType:
BasicTypeX
. IdentifierList
IdentifierList
Typeof
Typeof . IdentifierList
TypeCtor ( Type )
*/
class BasicType : Lexem {
public BasicTypeX _basicTypeX;
public IdentifierList _identifierList;
public Typeof _typeof;
public TypeCtor _typeCtor;
public Type _typeCtorType;
public bool _dotBeforeIdentifierList;
public override @property LexemType type() { return LexemType.BASIC_TYPE; }
public this()
in {
}
body {
}
}
/**
Typeof.
Typeof:
typeof ( Expression )
typeof ( return )
For typeof(return), _expression is null
*/
class Typeof : Lexem {
public Expression _expression;
public override @property LexemType type() { return LexemType.TYPEOF; }
public this(Expression expression)
in {
}
body {
_expression = expression;
}
}
/**
Type.
*/
class Type : Lexem {
public TypeCtors _typeCtors;
public BasicType _basicType;
public AltDeclarator _altDeclarator;
public override @property LexemType type() { return LexemType.TYPE; }
public this()
in {
}
body {
}
}
/**
Expression.
Expression:
*/
class Expression : Lexem {
public override @property LexemType type() { return LexemType.EXPRESSION; }
public this()
in {
}
body {
}
}
/**
AltDeclarator.
AltDeclarator:
*/
class AltDeclarator : Lexem {
public override @property LexemType type() { return LexemType.ALT_DECLARATOR; }
public this()
in {
}
body {
}
}
class Lexer
{
LineStream _lineStream;
this(LineStream lineStream)
{
_lineStream = lineStream;
}
}

View File

@ -0,0 +1,10 @@
module ddc.lexer.LexerException;
class LexerException
{
this()
{
// Constructor code
}
}

589
src/ddc/lexer/LineStream.d Normal file
View File

@ -0,0 +1,589 @@
module ddc.lexer.LineStream;
import std.stream;
import ddc.lexer.exceptions;
import std.stdio;
import std.conv;
import ddc.lexer.textsource;
class LineStream : SourceLines {
public enum EncodingType {
ASCII,
UTF8,
UTF16BE,
UTF16LE,
UTF32BE,
UTF32LE
};
static immutable uint LINE_POSITION_UNDEFINED = uint.max;
static immutable int TEXT_BUFFER_SIZE = 1024;
static immutable int BYTE_BUFFER_SIZE = 512;
static immutable int QUARTER_BYTE_BUFFER_SIZE = BYTE_BUFFER_SIZE / 4;
InputStream _stream;
string _filename;
SourceFile _file;
ubyte[] _buf; // stream reading buffer
uint _pos; // reading position of stream buffer
uint _len; // number of bytes in stream buffer
bool _streamEof; // true if input stream is in EOF state
uint _line; // current line number
uint _textPos; // start of text line in text buffer
uint _textLen; // position of last filled char in text buffer + 1
dchar[] _textBuf; // text buffer
bool _eof; // end of file, no more lines
override @property SourceFile file() { return _file; }
@property string filename() { return _file.filename; }
override @property uint line() { return _line; }
@property EncodingType encoding() { return _encoding; }
override @property int errorCode() { return _errorCode; }
override @property string errorMessage() { return _errorMessage; }
override @property int errorLine() { return _errorLine; }
override @property int errorPos() { return _errorPos; }
immutable EncodingType _encoding;
int _errorCode;
string _errorMessage;
uint _errorLine;
uint _errorPos;
protected this(InputStream stream, SourceFile file, EncodingType encoding, ubyte[] buf, uint offset, uint len) {
_file = file;
_stream = stream;
_encoding = encoding;
_buf = buf;
_len = len;
_pos = offset;
_streamEof = _stream.eof;
}
// returns slice of bytes available in buffer
uint readBytes() {
uint bytesLeft = _len - _pos;
if (_streamEof || bytesLeft > QUARTER_BYTE_BUFFER_SIZE)
return bytesLeft;
if (_pos > 0) {
for (uint i = 0; i < bytesLeft; i++)
_buf[i] = _buf[i + _pos];
_len = bytesLeft;
_pos = 0;
}
uint bytesRead = cast(uint)_stream.read(_buf[_len .. BYTE_BUFFER_SIZE]);
_len += bytesRead;
_streamEof = _stream.eof;
return _len - _pos; //_buf[_pos .. _len];
}
// when bytes consumed from byte buffer, call this method to update position
void consumedBytes(uint count) {
_pos += count;
}
// reserve text buffer for specified number of characters, and return pointer to first free character in buffer
dchar * reserveTextBuf(uint len) {
// create new text buffer if necessary
if (_textBuf == null) {
if (len < TEXT_BUFFER_SIZE)
len = TEXT_BUFFER_SIZE;
_textBuf = new dchar[len];
return _textBuf.ptr;
}
uint spaceLeft = cast(uint)_textBuf.length - _textLen;
if (spaceLeft >= len)
return _textBuf.ptr + _textLen;
// move text to beginning of buffer, if necessary
if (_textPos > _textBuf.length / 2) {
uint charCount = _textLen - _textPos;
dchar * p = _textBuf.ptr;
for (uint i = 0; i < charCount; i++)
p[i] = p[i + _textPos];
_textLen = charCount;
_textPos = 0;
}
// resize buffer if necessary
if (_textLen + len > _textBuf.length) {
// resize buffer
uint newsize = cast(uint)_textBuf.length * 2;
if (newsize < _textLen + len)
newsize = _textLen + len;
_textBuf.length = newsize;
}
return _textBuf.ptr + _textLen;
}
void appendedText(uint len) {
//writeln("appended ", len, " chars of text"); //:", _textBuf[_textLen .. _textLen + len]);
_textLen += len;
}
void setError(int code, string message, uint errorLine, uint errorPos) {
_errorCode = code;
_errorMessage = message;
_errorLine = errorLine;
_errorPos = errorPos;
}
// override to decode text
abstract uint decodeText();
override public dchar[] readLine() {
if (_errorCode != 0) {
//writeln("error ", _errorCode, ": ", _errorMessage, " in line ", _errorLine);
return null; // error detected
}
if (_eof) {
//writeln("EOF found");
return null;
}
_line++;
uint p = 0;
uint eol = LINE_POSITION_UNDEFINED;
uint eof = LINE_POSITION_UNDEFINED;
uint lastchar = LINE_POSITION_UNDEFINED;
do {
if (_errorCode != 0) {
//writeln("error ", _errorCode, ": ", _errorMessage, " in line ", _errorLine);
return null; // error detected
}
uint charsLeft = _textLen - _textPos;
if (p >= charsLeft) {
uint decodedChars = decodeText();
if (_errorCode != 0) {
return null; // error detected
}
charsLeft = _textLen - _textPos;
if (decodedChars == 0) {
eol = charsLeft;
eof = charsLeft;
lastchar = charsLeft;
break;
}
}
for (; p < charsLeft; p++) {
dchar ch = _textBuf[_textPos + p];
if (ch == 0x0D) {
lastchar = p;
if (p == charsLeft - 1) {
// need one more char to check if it's 0D0A or just 0D eol
//writeln("read one more char for 0D0A detection");
decodeText();
if (_errorCode != 0) {
return null; // error detected
}
charsLeft = _textLen - _textPos;
}
dchar ch2 = (p < charsLeft - 1) ? _textBuf[_textPos + p + 1] : 0;
if (ch2 == 0x0A)
eol = p + 2;
else
eol = p + 1;
break;
} else if (ch == 0x0A || ch == 0x2028 || ch == 0x2029) {
// single char eoln
lastchar = p;
eol = p + 1;
break;
} else if (ch == 0 || ch == 0x001A) {
// eof
//writeln("EOF char found");
lastchar = p;
eol = eof = p + 1;
break;
}
}
} while (eol == LINE_POSITION_UNDEFINED);
uint lineStart = _textPos;
uint lineEnd = _textPos + lastchar;
_textPos += eol; // consume text
if (eof != LINE_POSITION_UNDEFINED) {
_eof = true;
//writeln("Setting eof flag. lastchar=", lastchar, ", p=", p, ", lineStart=", lineStart);
if (lineStart >= lineEnd) {
//writeln("lineStart >= lineEnd -- treat as eof");
return null; // eof
}
}
// return slice with decoded line
return _textBuf[lineStart .. lineEnd];
}
// factory for string parser
public static LineStream create(string code, string filename = "") {
uint len = cast(uint)code.length;
ubyte[] data = new ubyte[len + 3];
for (uint i = 0; i < len; i++)
data[i + 3] = code[i];
// BOM for UTF8
data[0] = 0xEF;
data[1] = 0xBB;
data[2] = 0xBF;
MemoryStream stream = new MemoryStream(data);
return create(stream, filename);
}
// factory
public static LineStream create(InputStream stream, string filename) {
ubyte[] buf = new ubyte[BYTE_BUFFER_SIZE];
buf[0] = buf[1] = buf[2] = buf[3] = 0;
if (!stream.isOpen)
return null;
uint len = cast(uint)stream.read(buf);
if (buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF) {
return new Utf8LineStream(stream, filename, buf, len);
} else if (buf[0] == 0x00 && buf[1] == 0x00 && buf[2] == 0xFE && buf[3] == 0xFF) {
return new Utf32beLineStream(stream, filename, buf, len);
} else if (buf[0] == 0xFF && buf[1] == 0xFE && buf[2] == 0x00 && buf[3] == 0x00) {
return new Utf32leLineStream(stream, filename, buf, len);
} else if (buf[0] == 0xFE && buf[1] == 0xFF) {
return new Utf16beLineStream(stream, filename, buf, len);
} else if (buf[0] == 0xFF && buf[1] == 0xFE) {
return new Utf16leLineStream(stream, filename, buf, len);
} else {
return new AsciiLineStream(stream, filename, buf, len);
}
}
protected bool invalidCharFlag;
protected void invalidCharError() {
uint pos = _textLen - _textPos + 1;
setError(1, "Invalid character in line " ~ to!string(_line) ~ ":" ~ to!string(pos), _line, pos);
}
}
class AsciiLineStream : LineStream {
this(InputStream stream, string filename, ubyte[] buf, uint len) {
super(stream, new SourceFile(filename), EncodingType.ASCII, buf, 0, len);
}
override uint decodeText() {
if (invalidCharFlag) {
invalidCharError();
return 0;
}
uint bytesAvailable = readBytes();
ubyte * bytes = _buf.ptr + _pos;
if (bytesAvailable == 0)
return 0; // nothing to decode
uint len = bytesAvailable;
ubyte* b = bytes;
dchar* text = reserveTextBuf(len);
uint i = 0;
for (; i < len; i++) {
ubyte ch = b[i];
if (ch & 0x80) {
// invalid character
invalidCharFlag = true;
break;
}
text[i] = ch;
}
consumedBytes(i);
appendedText(i);
return len;
}
}
class Utf8LineStream : LineStream {
this(InputStream stream, string filename, ubyte[] buf, uint len) {
super(stream, new SourceFile(filename), EncodingType.UTF8, buf, 3, len);
}
override uint decodeText() {
if (invalidCharFlag) {
invalidCharError();
return 0;
}
uint bytesAvailable = readBytes();
ubyte * bytes = _buf.ptr + _pos;
if (bytesAvailable == 0)
return 0; // nothing to decode
uint len = bytesAvailable;
uint chars = 0;
ubyte* b = bytes;
dchar* text = reserveTextBuf(len);
uint i = 0;
for (; i < len; i++) {
uint ch = 0;
uint ch0 = b[i];
uint bleft = len - i;
uint bread = 0;
if (!(ch0 & 0x80)) {
// 0x00..0x7F single byte
ch = ch0;
bread = 1;
} if ((ch0 & 0xE0) == 0xC0) {
// two bytes 110xxxxx 10xxxxxx
if (bleft < 2)
break;
uint ch1 = b[i + 1];
if ((ch1 & 0xC0) != 0x80) {
invalidCharFlag = true;
break;
}
ch = ((ch0 & 0x1F) << 6) | ((ch1 & 0x3F));
bread = 2;
} if ((ch0 & 0xF0) == 0xE0) {
// three bytes 1110xxxx 10xxxxxx 10xxxxxx
if (bleft < 3)
break;
uint ch1 = b[i + 1];
uint ch2 = b[i + 2];
if ((ch1 & 0xC0) != 0x80 || (ch2 & 0xC0) != 0x80) {
invalidCharFlag = true;
break;
}
ch = ((ch0 & 0x0F) << 12) | ((ch1 & 0x1F) << 6) | ((ch2 & 0x3F));
bread = 3;
} if ((ch0 & 0xF8) == 0xF0) {
// four bytes 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
if (bleft < 4)
break;
uint ch1 = b[i + 1];
uint ch2 = b[i + 2];
uint ch3 = b[i + 3];
if ((ch1 & 0xC0) != 0x80 || (ch2 & 0xC0) != 0x80 || (ch3 & 0xC0) != 0x80) {
invalidCharFlag = true;
break;
}
ch = ((ch0 & 0x07) << 18) | ((ch1 & 0x3F) << 12) | ((ch2 & 0x3F) << 6) | ((ch3 & 0x3F));
bread = 4;
} if ((ch0 & 0xFC) == 0xF8) {
// five bytes 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
if (bleft < 5)
break;
uint ch1 = b[i + 1];
uint ch2 = b[i + 2];
uint ch3 = b[i + 3];
uint ch4 = b[i + 4];
if ((ch1 & 0xC0) != 0x80 || (ch2 & 0xC0) != 0x80 || (ch3 & 0xC0) != 0x80 || (ch4 & 0xC0) != 0x80) {
invalidCharFlag = true;
break;
}
ch = ((ch0 & 0x03) << 24) | ((ch1 & 0x3F) << 18) | ((ch2 & 0x3F) << 12) | ((ch3 & 0x3F) << 6) | ((ch4 & 0x3F));
bread = 5;
} if ((ch0 & 0xFE) == 0xFC) {
// six bytes 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
if (bleft < 6)
break;
uint ch1 = b[i + 1];
uint ch2 = b[i + 2];
uint ch3 = b[i + 3];
uint ch4 = b[i + 4];
uint ch5 = b[i + 5];
if ((ch1 & 0xC0) != 0x80 || (ch2 & 0xC0) != 0x80 || (ch3 & 0xC0) != 0x80 || (ch4 & 0xC0) != 0x80 || (ch5 & 0xC0) != 0x80) {
invalidCharFlag = true;
break;
}
ch = ((ch0 & 0x01) << 30) | ((ch1 & 0x3F) << 24) | ((ch2 & 0x3F) << 18) | ((ch3 & 0x3F) << 12) | ((ch4 & 0x3F) << 6) | ((ch5 & 0x3F));
bread = 5;
}
if ((ch >= 0xd800 && ch < 0xe000) || (ch > 0x10FFFF)) {
invalidCharFlag = true;
break;
}
if (ch < 0x10000) {
text[chars++] = ch;
} else {
uint lo = ch & 0x3FF;
uint hi = ch >> 10;
text[chars++] = (0xd800 | hi);
text[chars++] = (0xdc00 | lo);
}
i += bread - 1;
}
consumedBytes(i);
appendedText(chars);
uint bleft = len - i;
if (_streamEof && bleft > 0)
invalidCharFlag = true; // incomplete character at end of stream
return chars;
}
}
class Utf16beLineStream : LineStream {
this(InputStream stream, string filename, ubyte[] buf, uint len) {
super(stream, new SourceFile(filename), EncodingType.UTF16BE, buf, 2, len);
}
override uint decodeText() {
if (invalidCharFlag) {
invalidCharError();
return 0;
}
uint bytesAvailable = readBytes();
ubyte * bytes = _buf.ptr + _pos;
if (bytesAvailable == 0)
return 0; // nothing to decode
uint len = bytesAvailable;
uint chars = 0;
ubyte* b = bytes;
dchar* text = reserveTextBuf(len / 2 + 1);
uint i = 0;
for (; i < len - 1; i += 2) {
uint ch0 = b[i];
uint ch1 = b[i + 1];
uint ch = (ch0 << 8) | ch1;
// TODO: check special cases
text[chars++] = ch;
}
consumedBytes(i);
appendedText(chars);
uint bleft = len - i;
if (_streamEof && bleft > 0)
invalidCharFlag = true; // incomplete character at end of stream
return chars;
}
}
class Utf16leLineStream : LineStream {
this(InputStream stream, string filename, ubyte[] buf, uint len) {
super(stream, new SourceFile(filename), EncodingType.UTF16LE, buf, 2, len);
}
override uint decodeText() {
if (invalidCharFlag) {
invalidCharError();
return 0;
}
uint bytesAvailable = readBytes();
ubyte * bytes = _buf.ptr + _pos;
if (bytesAvailable == 0)
return 0; // nothing to decode
uint len = bytesAvailable;
uint chars = 0;
ubyte* b = bytes;
dchar* text = reserveTextBuf(len / 2 + 1);
uint i = 0;
for (; i < len - 1; i += 2) {
uint ch0 = b[i];
uint ch1 = b[i + 1];
uint ch = (ch1 << 8) | ch0;
// TODO: check special cases
text[chars++] = ch;
}
consumedBytes(i);
appendedText(chars);
uint bleft = len - i;
if (_streamEof && bleft > 0)
invalidCharFlag = true; // incomplete character at end of stream
return chars;
}
}
class Utf32beLineStream : LineStream {
this(InputStream stream, string filename, ubyte[] buf, uint len) {
super(stream, new SourceFile(filename), EncodingType.UTF32BE, buf, 4, len);
}
override uint decodeText() {
if (invalidCharFlag) {
invalidCharError();
return 0;
}
uint bytesAvailable = readBytes();
ubyte * bytes = _buf.ptr + _pos;
if (bytesAvailable == 0)
return 0; // nothing to decode
uint len = bytesAvailable;
uint chars = 0;
ubyte* b = bytes;
dchar* text = reserveTextBuf(len / 2 + 1);
uint i = 0;
for (; i < len - 3; i += 4) {
uint ch0 = b[i];
uint ch1 = b[i + 1];
uint ch2 = b[i + 2];
uint ch3 = b[i + 3];
uint ch = (ch0 << 24) | (ch1 << 16) | (ch2 << 8) | ch3;
if ((ch >= 0xd800 && ch < 0xe000) || (ch > 0x10FFFF)) {
invalidCharFlag = true;
break;
}
text[chars++] = ch;
}
consumedBytes(i);
appendedText(chars);
uint bleft = len - i;
if (_streamEof && bleft > 0)
invalidCharFlag = true; // incomplete character at end of stream
return chars;
}
}
class Utf32leLineStream : LineStream {
this(InputStream stream, string filename, ubyte[] buf, uint len) {
super(stream, new SourceFile(filename), EncodingType.UTF32LE, buf, 4, len);
}
override uint decodeText() {
if (invalidCharFlag) {
invalidCharError();
return 0;
}
uint bytesAvailable = readBytes();
ubyte * bytes = _buf.ptr + _pos;
if (bytesAvailable == 0)
return 0; // nothing to decode
uint len = bytesAvailable;
uint chars = 0;
ubyte* b = bytes;
dchar* text = reserveTextBuf(len / 2 + 1);
uint i = 0;
for (; i < len - 3; i += 4) {
uint ch3 = b[i];
uint ch2 = b[i + 1];
uint ch1 = b[i + 2];
uint ch0 = b[i + 3];
uint ch = (ch0 << 24) | (ch1 << 16) | (ch2 << 8) | ch3;
if ((ch >= 0xd800 && ch < 0xe000) || (ch > 0x10FFFF)) {
invalidCharFlag = true;
break;
}
text[chars++] = ch;
}
consumedBytes(i);
appendedText(chars);
uint bleft = len - i;
if (_streamEof && bleft > 0)
invalidCharFlag = true; // incomplete character at end of stream
return chars;
}
}
unittest {
static if (false) {
import std.stdio;
import std.conv;
import std.utf;
//string fname = "C:\\projects\\d\\ddc\\ddclexer\\src\\ddc\\lexer\\LineStream.d";
//string fname = "/home/lve/src/d/ddc/ddclexer/" ~ __FILE__; //"/home/lve/src/d/ddc/ddclexer/src/ddc/lexer/Lexer.d";
//string fname = "/home/lve/src/d/ddc/ddclexer/tests/LineStream_utf8.d";
//string fname = "/home/lve/src/d/ddc/ddclexer/tests/LineStream_utf16be.d";
//string fname = "/home/lve/src/d/ddc/ddclexer/tests/LineStream_utf16le.d";
//string fname = "/home/lve/src/d/ddc/ddclexer/tests/LineStream_utf32be.d";
string fname = "/home/lve/src/d/ddc/ddclexer/tests/LineStream_utf32le.d";
writeln("opening file");
std.stream.File f = new std.stream.File(fname);
scope(exit) { f.close(); }
try {
LineStream lines = LineStream.create(f, fname);
for (;;) {
dchar[] s = lines.readLine();
if (s is null)
break;
writeln("line " ~ to!string(lines.line()) ~ ":" ~ toUTF8(s));
}
if (lines.errorCode != 0) {
writeln("Error ", lines.errorCode, " ", lines.errorMessage, " -- at line ", lines.errorLine, " position ", lines.errorPos);
} else {
writeln("EOF reached");
}
} catch (Exception e) {
writeln("Exception " ~ e.toString);
}
}
}
// LAST LINE

View File

@ -0,0 +1,10 @@
module ddc.lexer.SourceEncodingException;
class SourceEncodingException : Exception
{
this(string msg)
{
super(msg);
}
}

2636
src/ddc/lexer/Tokenizer.d Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,32 @@
module ddc.lexer.exceptions;
import std.conv;
class ParserException : Exception {
string _msg;
string _filename;
size_t _line;
size_t _pos;
public @property size_t line() { return _line; }
this(string msg, string filename, size_t line, size_t pos) {
super(msg ~ " at " ~ filename ~ " line " ~ to!string(line) ~ " column " ~ to!string(pos));
_msg = msg;
_filename = filename;
_line = line;
_pos = pos;
}
}
class LexerException : ParserException {
this(string msg, string filename, size_t line, size_t pos) {
super(msg, filename, line, pos);
}
}
class SourceEncodingException : LexerException {
this(string msg, string filename, size_t line, size_t pos) {
super(msg, filename, line, pos);
}
}

103
src/ddc/lexer/textsource.d Normal file
View File

@ -0,0 +1,103 @@
module ddc.lexer.textsource;
private import std.utf;
private import std.array;
/**
* Source file information.
* Even if contains only file name, it's better to use it instead of string - object reference size is twice less than array ref.
*/
class SourceFile {
protected string _file;
public @property string filename() { return _file; }
public this(string filename) {
_file = filename;
}
}
/// source lines for tokenizer
interface SourceLines {
/// source file
@property SourceFile file();
/// last read line
@property uint line();
/// source encoding
//@property EncodingType encoding() { return _encoding; }
/// error code
@property int errorCode();
/// error message
@property string errorMessage();
/// error line
@property int errorLine();
/// error position
@property int errorPos();
/// read line, return null if EOF reached or error occured
dchar[] readLine();
}
/// Simple text source based on array
class ArraySourceLines : SourceLines {
protected SourceFile _file;
protected uint _line;
protected uint _firstLine;
protected dstring[] _lines;
static protected dchar[] _emptyLine = ""d.dup;
this() {
}
this(dstring[] lines, SourceFile file, uint firstLine = 0) {
init(lines, file, firstLine);
}
this(string code, string filename) {
_lines = (toUTF32(code)).split("\n");
_file = new SourceFile(filename);
}
void close() {
_lines = null;
_line = 0;
_firstLine = 0;
_file = null;
}
void init(dstring[] lines, SourceFile file, uint firstLine = 0) {
_lines = lines;
_firstLine = firstLine;
_line = 0;
_file = file;
}
bool reset(int line) {
_line = line;
return true;
}
/// source file
override @property SourceFile file() { return _file; }
/// last read line
override @property uint line() { return _line; }
/// source encoding
//@property EncodingType encoding() { return _encoding; }
/// error code
override @property int errorCode() { return 0; }
/// error message
override @property string errorMessage() { return ""; }
/// error line
override @property int errorLine() { return 0; }
/// error position
override @property int errorPos() { return 0; }
/// read line, return null if EOF reached or error occured
override dchar[] readLine() {
if (_line < _lines.length) {
if (_lines[_line])
return cast(dchar[])_lines[_line++];
_line++;
return _emptyLine;
}
return null; // EOF
}
}

View File

@ -17,8 +17,84 @@ import dlangide.ui.wspanel;
import dlangide.workspace.workspace;
import dlangide.workspace.project;
import ddc.lexer.textsource;
import ddc.lexer.exceptions;
import ddc.lexer.Tokenizer;
import std.conv;
import std.utf;
import std.algorithm;
class SimpleDSyntaxHighlighter : SyntaxHighlighter {
SourceFile _file;
ArraySourceLines _lines;
Tokenizer _tokenizer;
this (string filename) {
_file = new SourceFile(filename);
_lines = new ArraySourceLines();
_tokenizer = new Tokenizer(_lines);
}
TokenPropString[] _props;
/// categorize characters in content by token types
void updateHighlight(dstring[] lines, TokenPropString[] props, int changeStartLine, int changeEndLine) {
_props = props;
changeStartLine = 0;
changeEndLine = lines.length;
_lines.init(lines[changeStartLine..$], _file, changeStartLine);
_tokenizer.init(_lines);
uint tokenPos = 0;
uint tokenLine = 0;
ubyte category = 0;
for (;;) {
Token token = _tokenizer.nextToken();
if (token is null) {
//writeln("Null token returned");
break;
}
if (token.type == TokenType.EOF) {
//writeln("EOF token");
break;
}
uint newPos = token.pos;
uint newLine = token.line;
if (category) {
// fill with category
for (uint i = tokenLine - 1; i <= newLine - 1; i++) {
uint start = i > tokenLine - 1 ? 0 : tokenPos;
uint end = i < newLine - 1 ? lines[i].length : tokenPos;
for (uint j = start; j < end; j++) {
assert(i < _props.length);
if (j - 1 < _props[i].length)
_props[i][j - 1] = category;
}
}
}
TokenType t = token.type;
// handle token
if (t == TokenType.COMMENT) {
category = TokenCategory.Comment;
} else if (t == TokenType.KEYWORD) {
category = TokenCategory.Keyword;
} else if (t == TokenType.IDENTIFIER) {
category = TokenCategory.Identifier;
} else if (t == TokenType.STRING) {
category = TokenCategory.String;
} else {
category = 0;
}
tokenPos = newPos;
tokenLine= newLine;
}
_lines.close();
_props = null;
}
}
/// DIDE source file editor
class DSourceEdit : SourceEdit {
@ -26,6 +102,10 @@ class DSourceEdit : SourceEdit {
super(ID);
styleId = null;
backgroundColor = 0xFFFFFF;
setTokenHightlightColor(TokenCategory.Comment, 0x808080); // gray
setTokenHightlightColor(TokenCategory.Keyword, 0x0020C0); // blue
setTokenHightlightColor(TokenCategory.String, 0xC02000); // red
setTokenHightlightColor(TokenCategory.Identifier, 0x206000); // green
}
this() {
this("SRCEDIT");
@ -34,8 +114,20 @@ class DSourceEdit : SourceEdit {
@property ProjectSourceFile projectSourceFile() { return _projectSourceFile; }
/// load by filename
override bool load(string fn) {
return super.load(fn);
_projectSourceFile = null;
bool res = super.load(fn);
setHighlighter();
return res;
}
void setHighlighter() {
if (filename.endsWith(".d") || filename.endsWith(".dd") || filename.endsWith(".dh") || filename.endsWith(".ddoc")) {
content.syntaxHighlighter = new SimpleDSyntaxHighlighter(filename);
} else {
content.syntaxHighlighter = null;
}
}
/// load by project item
bool load(ProjectSourceFile f) {
if (!load(f.filename)) {
@ -43,6 +135,7 @@ class DSourceEdit : SourceEdit {
return false;
}
_projectSourceFile = f;
setHighlighter();
return true;
}
}